Skip to content

Commit ba97372

Browse files
committed
basic XS inference for ref-overlapping alns
1 parent 23c46b7 commit ba97372

File tree

3 files changed

+21
-25
lines changed

3 files changed

+21
-25
lines changed

gclib/GBam.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,8 @@ switch (cop) {
388388

389389
char GBamRecord::spliceStrand() { // '+', '-' from the XS tag, or 0 if no XS tag
390390
char c=tag_char("XS");
391-
if (c) return c;
392-
else return '.';
393-
}
391+
return ((c=='+' || c=='-') ? c : '.');
392+
}
394393

395394
char* GBamRecord::sequence() { //user must free this after use
396395
char *s = (char*)bam1_seq(b);

rlink.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,8 @@ struct GReadAlnData {
428428
//GPVec< GVec<RC_ExonOvl> > g_exonovls; //>5bp overlaps with guide exons, for each read "exon"
429429
GReadAlnData(GBamRecord* bamrec=NULL, char nstrand=0, int num_hits=0,
430430
int hit_idx=0, TAlnInfo* tif=NULL):brec(bamrec), strand(nstrand),
431-
nh(num_hits), hi(hit_idx), juncs(true), tinfo(tif) { } //, g_exonovls(true)
431+
nh(num_hits), hi(hit_idx), juncs(true), tinfo(tif) { }
432+
void clear() { delete tinfo; }
432433
~GReadAlnData() { delete tinfo; }
433434
};
434435

@@ -526,18 +527,13 @@ struct BundleData {
526527
rc_tdata, rc_edata, rc_idata);
527528
}
528529
}
529-
/* after reference annotation was loaded
530-
void rc_finalize_refs() {
531-
if (rc_data==NULL) return;
532-
//rc_data->setupCov();
533-
}
534-
Not needed here, we update the coverage span as each transcript is added
535-
*/
530+
536531
void keepGuide(GffObj* scaff, GPVec<RC_TData>* rc_tdata=NULL,
537532
GPVec<RC_Feature>* rc_edata=NULL, GPVec<RC_Feature>* rc_idata=NULL);
538533

539-
//bool evalReadAln(GBamRecord& brec, char& strand, int nh); //, int hi);
540534
bool evalReadAln(GReadAlnData& alndata, char& strand);
535+
//update read alndata with info about reference overlaps
536+
//also updates the transcription strand if needed/possible
541537

542538
void Clear() {
543539
keepguides.Clear();

stringtie.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,6 @@ if (tstackSize<DEF_TSTACK_SIZE) defStackSize=DEF_TSTACK_SIZE;
406406
bool chr_changed=false;
407407
int pos=0;
408408
const char* refseqName=NULL;
409-
//char strand=0;
410409
char xstrand=0;
411410
int nh=1;
412411
int hi=0;
@@ -441,7 +440,8 @@ if (tstackSize<DEF_TSTACK_SIZE) defStackSize=DEF_TSTACK_SIZE;
441440
}
442441
if (xstrand=='.' && brec->exons.Count()>1) {
443442
no_xs++;
444-
continue; //skip spliced alignments lacking XS tag (e.g. HISAT alignments)
443+
//continue; //skip spliced alignments lacking XS tag (e.g. HISAT alignments) ?
444+
//this is changed in StringTie 2 !
445445
}
446446
if (refseqName==NULL) GError("Error: cannot retrieve target seq name from BAM record!\n");
447447
pos=brec->start; //BAM is 0 based, but GBamRecord makes it 1-based
@@ -461,10 +461,12 @@ if (tstackSize<DEF_TSTACK_SIZE) defStackSize=DEF_TSTACK_SIZE;
461461
if (alncounts.Count()<=gseq_id) {
462462
alncounts.Resize(gseq_id+1, 0);
463463
}
464-
else if (alncounts[gseq_id]>0) GError(ERR_BAM_SORT);
464+
else if (alncounts[gseq_id]>0)
465+
GError(ERR_BAM_SORT);
465466
prev_pos=0;
466467
}
467-
if (pos<prev_pos) GError(ERR_BAM_SORT);
468+
if (pos<prev_pos)
469+
GError(ERR_BAM_SORT);
468470
prev_pos=pos;
469471
if (skipGseq) continue;
470472
alncounts[gseq_id]++;
@@ -657,19 +659,18 @@ if (tstackSize<DEF_TSTACK_SIZE) defStackSize=DEF_TSTACK_SIZE;
657659
}
658660
} while (cend_changed);
659661
}
660-
} //adjusted currentend and checked for overlapping reference transcripts
662+
} //adjusted currentend ; selected overlapping reference transcripts
661663
GReadAlnData alndata(brec, 0, nh, hi, tinfo);
662664
bool ovlpguide=bundle->evalReadAln(alndata, xstrand);
663-
if(!eonly || ovlpguide) { // in eonly case consider read only if it overlaps guide
664-
//check for overlaps with ref transcripts which may set xstrand
665+
//overlaps with ref transcripts may set xstrand if needed
666+
if (!eonly || ovlpguide) {
667+
// in -e mode: a read alignment is discarded if it doesn't overlap a reference transcript!
665668
if (xstrand=='+') alndata.strand=1;
666669
else if (xstrand=='-') alndata.strand=-1;
667-
//GMessage("%s\t%c\t%d\thi=%d\n",brec->name(), xstrand, alndata.strand,hi);
668-
//countFragment(*bundle, *brec, hi,nh); // we count this in build_graphs to only include mapped fragments that we consider correctly mapped
669-
//fprintf(stderr,"fragno=%d fraglen=%lu\n",bundle->num_fragments,bundle->frag_len);if(bundle->num_fragments==100) exit(0);
670-
//if (!ballgown || ref_overlap)
671-
processRead(currentstart, currentend, *bundle, hashread, alndata);
672-
// *brec, strand, nh, hi);
670+
//this is changed in StringTie 2 ! (improved transcription strand guessing)
671+
bool XSmissing=(alndata.strand==0 && brec->exons.Count()>1);
672+
if (!XSmissing)
673+
processRead(currentstart, currentend, *bundle, hashread, alndata);
673674
}
674675
} //for each read alignment
675676

0 commit comments

Comments
 (0)