@@ -353,7 +353,13 @@ GffLine::GffLine(GffReader* reader, const char* l): _parents(NULL), _parents_len
353353 is_gene=true ;
354354 is_t_data=true ; // because its name will be attached to parented transcripts
355355 }
356+
357+ // give up if weirdo features can be safely ignored
358+ if (reader->transcriptsOnly && !is_t_data) {
359+ return ; // skipping unrecognized non-transcript features
360+ }
356361 char * Parent=NULL ;
362+
357363 if (reader->is_gff3 || reader->gff_type ==0 ) {
358364 ID=extractAttr (" ID=" ,true );
359365 /*
@@ -474,9 +480,6 @@ GffLine::GffLine(GffReader* reader, const char* l): _parents(NULL), _parents_len
474480 }
475481 } // GFF3
476482 else { // GTF syntax
477- if (reader->transcriptsOnly && !is_t_data) {
478- return ; // skipping unrecognized non-transcript features
479- }
480483 if (is_gene) {
481484 reader->gtf_gene =true ;
482485 ID=extractAttr (" transcript_id" , true , true ); // Ensemble GTF might lack this
@@ -1607,21 +1610,30 @@ void GfList::finalize(GffReader* gfr, bool mergeCloseExons,
16071610}
16081611
16091612GffObj* GffObj::finalize (GffReader* gfr, bool mergeCloseExons, bool keepAttrs, bool noExonAttr) {
1610- /* if (isGene()) {
1611- if (children.Count()==0) {
1612- isTranscript(true);
1613- //some bacterial annotation, childless genes may be in fact transcripts
1613+ /*
1614+ if (isGene()) {
1615+ if (children.Count()==0) {
1616+ //isolated "gene"-only record (some bacterial/mitochondrial annotation, or pseudo-genes?)
1617+ isTranscript(true); //a terrible compromise, better define exons if it's really an exon there
16141618 }
1615- else
1616- if (gfr->transcriptsOnly) {
1619+
1620+ else if (gfr->transcriptsOnly) {
1621+ //if we're interested only in transcripts, ignore gene entries
16171622 isDiscarded(true);
16181623 }
16191624 }
16201625 */
1626+ if (isDiscarded ()) return this ; // don't care about "finalizing" this
1627+
1628+ if (exons.Count ()==0 && isTranscript ()) {
1629+ // add exon feature to "transcripts" missing it
1630+ addExon (this ->start , this ->end );
1631+ }
16211632 // always merge adjacent or overlapping segments
16221633 // but if mergeCloseExons then merge even when distance is up to 5 bases
1623- if (gfr->transcriptsOnly && !(isTranscript () || (isGene () && children.Count ()==0 ))) {
1624- isDiscarded (true ); // discard non-transcripts
1634+ if (gfr->transcriptsOnly && !isTranscript ()) {
1635+ isDiscarded (true ); // ignore non-transcripts, isolated genes etc.
1636+ return this ;
16251637 }
16261638 if (ftype_id==gff_fid_transcript && CDstart>0 ) {
16271639 ftype_id=gff_fid_mRNA;
@@ -1660,7 +1672,7 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
16601672 this ->start =exons.First ()->start ;
16611673 this ->end =exons.Last ()->end ;
16621674 // also update the stats for the reference sequence
1663- if (!this -> isDiscarded ()) { // collect stats about the underlying genomic sequence
1675+ // if (!isDiscarded()) { //collect stats about the underlying genomic sequence
16641676 if (gfr->gseqtable .Count ()<=gseq_id) {
16651677 gfr->gseqtable .setCount (gseq_id+1 );
16661678 }
@@ -1677,7 +1689,7 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
16771689 gsd->maxfeat_len =this ->len ();
16781690 gsd->maxfeat =this ;
16791691 }
1680- }
1692+ // }
16811693 uptr=NULL ;
16821694 udata=0 ;
16831695 }
@@ -1710,10 +1722,6 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
17101722 }
17111723 if (attrs_discarded) exons[0 ]->attrs ->Pack ();
17121724 }
1713- if (exons.Count ()==0 && isTranscript ()) {
1714- // add exon feature to an exonless transcript
1715- addExon (this ->start , this ->end );
1716- }
17171725 return this ;
17181726}
17191727
0 commit comments