@@ -46,120 +46,53 @@ void rc_updateExonCounts(const RC_Feature* exon, int nh) {
4646 if (nh<=1 ) exon->ucount ++;
4747}
4848
49- bool BundleData::evalReadAln (GBamRecord& brec, char & strand, int nh) { // , int hi) {
49+ bool BundleData::evalReadAln (GBamRecord& brec, char & strand, int nh) {
5050 if (rc_data==NULL ) {
5151 return false ; // no ref transcripts available for this reads' region
5252 }
5353 if ((int )brec.end <rc_data->lmin || (int )brec.start >rc_data->rmax ) {
5454 return false ; // hit outside coverage area
5555 }
5656 if (rc_data->exons .Count ()==0 ) return false ;
57-
58- if (ballgown) {
59- if (rc_data->tdata .Count ()==0 ) return false ; // nothing to do without transcripts
60- // check this read alignment against ref exons and introns
61- /*
62- int gstart=brec.start; //alignment start position on the genome
63-
64- We NEVER update read-counting boundaries of the bundle based on reads - they should only be based on reference transcripts
65- if (rc_data->f_cov.size()==0 && gstart<rc_data->lmin) {
66- fprintf(stderr, "Warning: adjusting lmin coverage bundle from %d to %d !\n", int(rc_data->lmin), (int)gstart);
67- rc_data->lmin=gstart;
68- }
69- */
70- /*
71- int gpos=brec.start; //current genomic position
72- int rlen=0; //read length, obtained here from the cigar string
73- int segstart=gstart;
74- */
75- vector<RC_Seg> rsegs;
76- vector<RC_Seg> rintrons;
77- for (int i=0 ;i<brec.exons .Count ();i++) {
57+ if (ballgown && rc_data->tdata .Count ()==0 ) return false ;
58+ // nothing to do without transcripts
59+ // check this read alignment against ref exons and introns
60+ char strandbits=0 ;
61+ for (int i=0 ;i<brec.exons .Count ();i++) {
62+ if (ballgown)
7863 rc_data->updateCov (strand, nh, brec.exons [i].start , brec.exons [i].len ());
79- rsegs.push_back (RC_Seg (brec.exons [i].start , brec.exons [i].end ) );
80- if (i>0 ) {
81- // add intron
82- rintrons.push_back (RC_Seg (brec.exons [i-1 ].end +1 , brec.exons [i].start -1 ));
64+ GArray<RC_FeatOvl> exonOverlaps (true , true ); // overlaps sorted by decreasing length
65+ if (rc_data->findOvlExons (exonOverlaps, brec.exons [i].start ,
66+ brec.exons [i].end , strand)) {
67+ int max_ovl=exonOverlaps[0 ].ovlen ;
68+ if (max_ovl>=5 ) { // only count exon overlaps of at least 5bp
69+ for (int k=0 ;k<exonOverlaps.Count ();++k) {
70+ if (exonOverlaps[k].feature ->strand ==' +' ) strandbits |= 0x01 ;
71+ else if (exonOverlaps[k].feature ->strand ==' -' ) strandbits |= 0x02 ;
72+ if (max_ovl - exonOverlaps[k].ovlen > 5 )
73+ break ; // ignore overlaps shorter than max_overlap - 5bp
74+ // TODO: perhaps we could use a better approach for non-overlapping exons
75+ // spanned by this same read alignment
76+ // counting this overlap for multiple exons if it's similarly large
77+ // (in the shared region of overlapping exons)
78+ rc_updateExonCounts (exonOverlaps[k].feature , nh);
79+ }
80+ } // exon overlap >= 5
81+ } // exon overlap processing
82+ if (i>0 ) { // intron processing
83+ RC_Feature* ri=rc_data->findIntron (brec.exons [i-1 ].end +1 , brec.exons [i].start -1 , strand);
84+ if (ri) {
85+ ri->rcount ++;
86+ ri->mrcount += (nh > 1 ) ? (1.0 /nh) : 1 ;
87+ if (nh==1 ) ri->ucount ++;
8388 }
84- }
85- // now check rexons and rintrons with findExons() and findIntron()
86- for (size_t i=0 ;i<rintrons.size ();++i) {
87- /* RC_FeatIt ri=rc_data->findIntron(rintrons[i].l, rintrons[i].r, strand);
88- if (ri!=rc_data->introns.end()) {
89- (*ri).rcount++;
90- (*ri).mrcount += (nh > 1) ? (1.0/nh) : 1;
91- if (nh==1) (*ri).ucount++;
92- }
93- */
94- RC_Feature* ri=rc_data->findIntron (rintrons[i].l , rintrons[i].r , strand);
95- if (ri) {
96- ri->rcount ++;
97- ri->mrcount += (nh > 1 ) ? (1.0 /nh) : 1 ;
98- if (nh==1 ) ri->ucount ++;
99- }
100- } // for each intron
10189
102- char strandbits=0 ;
103- for (size_t i=0 ;i<rsegs.size ();++i) {
104- RC_FeatPtrSet ovlex=rc_data->findExons (rsegs[i].l , rsegs[i].r , strand);
105- if (ovlex.size ()==0 ) continue ;
106- if (ovlex.size ()>1 ) {
107- vector< pair<int , const RC_Feature*> > xovl; // overlapped exons sorted by decreasing overlap length
108- for (RC_FeatPtrSet::iterator ox=ovlex.begin ();ox != ovlex.end (); ++ox) {
109- int ovlen=(*ox)->ovlen (rsegs[i].l , rsegs[i].r );
110- if (ovlen>=5 )
111- xovl.push_back (pair<int , const RC_Feature*>(ovlen, *ox));
112- }
113- if (xovl.size ()>1 ) {
114- sort (xovl.begin (), xovl.end (), OvlSorter); // larger overlaps first
115- // update the counts only for ref exons with max overlap to this segment
116- int max_ovl=xovl.begin ()->first ;
117- for (vector<pair<int , const RC_Feature*> >::iterator xo=xovl.begin ();xo!=xovl.end ();++xo) {
118- if (xo->second ->strand ==' +' ) strandbits |= 0x01 ;
119- else if (xo->second ->strand ==' -' ) strandbits |= 0x02 ;
120- if (max_ovl - xo->first > 5 ) break ; // more than +5 bases coverage for the other exons
121- rc_updateExonCounts (xo->second , nh);
122- }
123- } else if (xovl.size () == 1 ) {
124- rc_updateExonCounts (xovl.begin ()->second , nh);
125- }
126- } else {
127- // 1 exon overlap only
128- int ovlen=(*ovlex.begin ())->ovlen (rsegs[i].l , rsegs[i].r );
129- if (ovlen>=5 ) {
130- if ((*ovlex.begin ())->strand ==' +' ) strandbits |= 0x01 ;
131- else if ((*ovlex.begin ())->strand ==' -' ) strandbits |= 0x02 ;
132- rc_updateExonCounts (*ovlex.begin (), nh);
133- }
134- }
135- } // for each read alignment "exon"
136- if (strand==' .' && strandbits && strandbits<3 ) {
137- strand = (strandbits==1 ) ? ' +' : ' -' ;
138- }
90+ } // intron processing
13991 }
140- else {
141- // don't keep track of ballgown coverage data, just check for exon overlaps
142- // TODO check and assign strand here if needed
143- if (strand==' .' ) {
144- vector< pair<int , const RC_Feature*> > xovl; // overlapped exons sorted by ovl len
145- for (int i=0 ;i<brec.exons .Count ();i++) {
146- RC_FeatPtrSet ovlex=rc_data->findExons (brec.exons [i].start , brec.exons [i].end , strand);
147- for (RC_FeatPtrSet::iterator ox=ovlex.begin ();ox != ovlex.end (); ++ox) {
148- int ovlen=(*ox)->ovlen (brec.exons [i].start , brec.exons [i].end );
149- if (ovlen>=5 )
150- xovl.push_back (pair<int , const RC_Feature*>(ovlen, *ox));
151- }
152- } // for each read alignment "exon"
153- char strandbits=0 ;
154- for (vector<pair<int , const RC_Feature*> >::iterator xo=xovl.begin ();xo!=xovl.end ();++xo) {
155- if (xo->second ->strand ==' +' ) strandbits |= 0x01 ;
156- else if (xo->second ->strand ==' -' ) strandbits |= 0x02 ;
157- }
158- if (strandbits && strandbits<3 ) {
159- strand = (strandbits==1 ) ? ' +' : ' -' ;
160- }
161- }
92+ if (strand==' .' && strandbits && strandbits<3 ) {
93+ strand = (strandbits==1 ) ? ' +' : ' -' ;
16294 }
95+
16396 return true ;
16497}
16598
0 commit comments