Skip to content

Commit 8e49ff5

Browse files
committed
fix unstable sorting issue affecting read strand assignment in some loci
1 parent 3581669 commit 8e49ff5

5 files changed

Lines changed: 18 additions & 35 deletions

File tree

rlink.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7353,8 +7353,6 @@ bool guide_exon_overlap(GPVec<GffObj>& guides,int sno,uint start,uint end) {
73537353
return false;
73547354
}
73557355

7356-
//int build_graphs(int refstart, GList<CReadAln>& readlist,
7357-
// GList<CJunction>& junction, GPVec<GffObj>& guides, GVec<float>& bpcov, GList<CPrediction>& pred,bool fast) {
73587356
int build_graphs(BundleData* bdata, bool fast) {
73597357
int refstart = bdata->start;
73607358
GList<CReadAln>& readlist = bdata->readlist;

rlink.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,9 @@ struct GReadAlnData {
277277
int nh;
278278
int hi;
279279
GPVec<CJunction> juncs;
280-
GPVec< GVec<RC_ExonOvl> > g_exonovls; //>5bp overlaps with guide exons, for each read "exon"
280+
//GPVec< GVec<RC_ExonOvl> > g_exonovls; //>5bp overlaps with guide exons, for each read "exon"
281281
GReadAlnData(GBamRecord* bamrec=NULL, char nstrand=0, int num_hits=0, int hit_idx=0):brec(bamrec),
282-
strand(nstrand), nh(num_hits), hi(hit_idx), juncs(true), g_exonovls(true) { }
282+
strand(nstrand), nh(num_hits), hi(hit_idx), juncs(true) { } //, g_exonovls(true)
283283
};
284284

285285
struct CTCov { //covered transcript info

stringtie.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -567,26 +567,14 @@ if (ballgown)
567567
//if (ballgown && bundle->rc_data)
568568
//ref_overlap=
569569
GReadAlnData alndata(brec, 0, nh, hi);
570-
bundle->evalReadAln(alndata, xstrand); //xstrand, nh);
571-
if (xstrand=='+') alndata.strand=1;
570+
bundle->evalReadAln(alndata, xstrand); //xstrand, nh);
571+
if (xstrand=='+') alndata.strand=1;
572572
else if (xstrand=='-') alndata.strand=-1;
573-
countFragment(*bundle, *brec, hi);
573+
//GMessage("%s\t%c\t%d\n",brec->name(), xstrand, alndata.strand);
574+
countFragment(*bundle, *brec, hi);
574575
//if (!ballgown || ref_overlap)
575-
processRead(currentstart, currentend, *bundle, hashread, alndata);
576+
processRead(currentstart, currentend, *bundle, hashread, alndata);
576577
// *brec, strand, nh, hi);
577-
578-
//update current end to be at least as big as the start of the read pair in the fragment?? -> maybe not because then I could introduce some false positives with paired reads mapped badly
579-
580-
/*
581-
if(guides) { // I need to adjust end according to guides
582-
while( ng_end+1 < ng && (int)(*guides)[ng_end+1]->start<=currentend) {
583-
ng_end++;
584-
if(currentend < (int)(*guides)[ng_end]->end) {
585-
currentend=(*guides)[ng_end]->end;
586-
}
587-
}
588-
}
589-
*/
590578
} //for each read alignment
591579
if (guided && no_ref_used)
592580
GMessage("WARNING: no reference transcripts were found for the genomic sequences where reads were mapped!\n"

tablemaker.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,26 +77,21 @@ bool BundleData::evalReadAln(GReadAlnData& alndata, char& xstrand) {
7777
if (rc_data->findOvlExons(exonOverlaps, brec.exons[i].start,
7878
brec.exons[i].end, xstrand, mate_pos)) {
7979
int max_ovl=exonOverlaps[0].ovlen;
80-
alndata.g_exonovls.Add(new GVec<RC_ExonOvl>(exonOverlaps));
81-
//if (max_ovl>=5) { //only count exon overlaps of at least 5bp
80+
//alndata.g_exonovls.Add(new GVec<RC_ExonOvl>(exonOverlaps));
8281
for (int k=0;k<exonOverlaps.Count();++k) {
8382
//if (exonOverlaps[k].ovlen < 5) break; //ignore very short overlaps
84-
if (exonOverlaps[k].feature->strand=='+') strandbits |= 0x01;
85-
else if (exonOverlaps[k].feature->strand=='-') strandbits |= 0x02;
8683
if (k && (exonOverlaps[k].mate_ovl < exonOverlaps[0].mate_ovl
8784
|| exonOverlaps[k].ovlen+5<max_ovl) )
88-
break; //ignore overlaps shorter than max_overlap - 5bp
85+
break; //ignore further overlaps after a mate matched or if they are shorter than max_overlap-5
86+
if (exonOverlaps[k].feature->strand=='+') strandbits |= 0x01;
87+
else if (exonOverlaps[k].feature->strand=='-') strandbits |= 0x02;
8988
//TODO: perhaps we could use a better approach for non-overlapping ref exons
9089
// spanned by this same read alignment
9190
//counting this overlap for multiple exons if it's similarly large
9291
//(in the shared region of overlapping exons)
9392
rc_updateExonCounts(exonOverlaps[k], nh);
9493
}
95-
//} //exon overlap >= 5
9694
} //ref exon overlaps
97-
else { //no ref exon overlaps
98-
alndata.g_exonovls.Add(new GVec<RC_ExonOvl>((int)0));
99-
}
10095
if (i>0) { //intron processing
10196
int j_l=brec.exons[i-1].end+1;
10297
int j_r=brec.exons[i].start-1;
@@ -114,7 +109,6 @@ bool BundleData::evalReadAln(GReadAlnData& alndata, char& xstrand) {
114109
if (xstrand=='.' && strandbits && strandbits<3) {
115110
xstrand = (strandbits==1) ? '+' : '-';
116111
}
117-
118112
return true;
119113
}
120114

tablemaker.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,13 @@ struct RC_ExonOvl {
135135
bool operator<(const RC_ExonOvl& o) const {
136136
if (mate_ovl!=o.mate_ovl)
137137
return (mate_ovl>o.mate_ovl);
138-
if (ovlen==o.ovlen) {
139-
return (feature<o.feature) ;
140-
}
141-
else return (ovlen>o.ovlen);
138+
if (ovlen!=o.ovlen)
139+
return (ovlen>o.ovlen);
140+
if (feature->r-feature->l != o.feature->r-o.feature->l)
141+
return (feature->r-feature->l > o.feature->r-o.feature->l);
142+
if (feature->strand != o.feature->strand)
143+
return (feature->strand<o.feature->strand);
144+
return (feature->l<o.feature->l);
142145
} //operator <
143146
bool operator==(const RC_ExonOvl& o) const {
144147
return (mate_ovl==o.mate_ovl && ovlen==o.ovlen && feature==o.feature);

0 commit comments

Comments
 (0)