Skip to content

Commit ecd2e7d

Browse files
committed
more stringent recognition of transcripts (only exon-based entities are loaded)
1 parent 6ea88d3 commit ecd2e7d

3 files changed

Lines changed: 26 additions & 23 deletions

File tree

Makefile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ INCDIRS := -I. -I${GDIR} -I${BAM}
88

99
CC := g++
1010

11-
1211
ifneq (,$(findstring nothreads,$(MAKECMDGOALS)))
1312
NOTHREADS=1
1413
endif
@@ -40,7 +39,6 @@ else
4039
EXE =
4140
endif
4241

43-
4442
BASEFLAGS := -Wall -Wextra ${INCDIRS} -fsigned-char -D_FILE_OFFSET_BITS=64 \
4543
-D_LARGEFILE_SOURCE -fno-strict-aliasing -fno-exceptions -fno-rtti
4644

@@ -87,7 +85,6 @@ else
8785
endif
8886
CFLAGS := -g -DDEBUG -D_DEBUG -DGDEBUG -fno-common -fstack-protector $(CFLAGS)
8987
LDFLAGS := -g -L${BAM}
90-
#LIBS := -Wl,-Bstatic -lasan -lubsan -Wl,-Bdynamic -ldl $(LIBS)
9188
LIBS := -lasan -lubsan -ldl $(LIBS)
9289
else
9390
ifneq (,$(filter %memtrace %memusage %memuse, $(MAKECMDGOALS)))
@@ -114,8 +111,6 @@ ifndef NOTHREADS
114111
OBJS += ${GDIR}/GThreads.o
115112
endif
116113

117-
118-
119114
OBJS += rlink.o tablemaker.o tmerge.o
120115

121116
all release static debug: stringtie${EXE}

gclib/gff.cpp

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,13 @@ GffLine::GffLine(GffReader* reader, const char* l): _parents(NULL), _parents_len
353353
is_gene=true;
354354
is_t_data=true; //because its name will be attached to parented transcripts
355355
}
356+
357+
//give up if weirdo features can be safely ignored
358+
if (reader->transcriptsOnly && !is_t_data) {
359+
return; //skipping unrecognized non-transcript features
360+
}
356361
char* Parent=NULL;
362+
357363
if (reader->is_gff3 || reader->gff_type==0) {
358364
ID=extractAttr("ID=",true);
359365
/*
@@ -474,9 +480,6 @@ GffLine::GffLine(GffReader* reader, const char* l): _parents(NULL), _parents_len
474480
}
475481
} //GFF3
476482
else { // GTF syntax
477-
if (reader->transcriptsOnly && !is_t_data) {
478-
return; //skipping unrecognized non-transcript features
479-
}
480483
if (is_gene) {
481484
reader->gtf_gene=true;
482485
ID=extractAttr("transcript_id", true, true); //Ensemble GTF might lack this
@@ -1607,21 +1610,30 @@ void GfList::finalize(GffReader* gfr, bool mergeCloseExons,
16071610
}
16081611

16091612
GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, bool noExonAttr) {
1610-
/* if (isGene()) {
1611-
if (children.Count()==0) {
1612-
isTranscript(true);
1613-
//some bacterial annotation, childless genes may be in fact transcripts
1613+
/*
1614+
if (isGene()) {
1615+
if (children.Count()==0) {
1616+
//isolated "gene"-only record (some bacterial/mitochondrial annotation, or pseudo-genes?)
1617+
isTranscript(true); //a terrible compromise, better define exons if it's really an exon there
16141618
}
1615-
else
1616-
if (gfr->transcriptsOnly) {
1619+
1620+
else if (gfr->transcriptsOnly) {
1621+
//if we're interested only in transcripts, ignore gene entries
16171622
isDiscarded(true);
16181623
}
16191624
}
16201625
*/
1626+
if (isDiscarded()) return this; //don't care about "finalizing" this
1627+
1628+
if (exons.Count()==0 && isTranscript()) {
1629+
//add exon feature to "transcripts" missing it
1630+
addExon(this->start, this->end);
1631+
}
16211632
//always merge adjacent or overlapping segments
16221633
//but if mergeCloseExons then merge even when distance is up to 5 bases
1623-
if (gfr->transcriptsOnly && !(isTranscript() || (isGene() && children.Count()==0))) {
1624-
isDiscarded(true); //discard non-transcripts
1634+
if (gfr->transcriptsOnly && !isTranscript()) {
1635+
isDiscarded(true); //ignore non-transcripts, isolated genes etc.
1636+
return this;
16251637
}
16261638
if (ftype_id==gff_fid_transcript && CDstart>0) {
16271639
ftype_id=gff_fid_mRNA;
@@ -1660,7 +1672,7 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
16601672
this->start=exons.First()->start;
16611673
this->end=exons.Last()->end;
16621674
//also update the stats for the reference sequence
1663-
if (!this->isDiscarded()) { //collect stats about the underlying genomic sequence
1675+
//if (!isDiscarded()) { //collect stats about the underlying genomic sequence
16641676
if (gfr->gseqtable.Count()<=gseq_id) {
16651677
gfr->gseqtable.setCount(gseq_id+1);
16661678
}
@@ -1677,7 +1689,7 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
16771689
gsd->maxfeat_len=this->len();
16781690
gsd->maxfeat=this;
16791691
}
1680-
}
1692+
//}
16811693
uptr=NULL;
16821694
udata=0;
16831695
}
@@ -1710,10 +1722,6 @@ GffObj* GffObj::finalize(GffReader* gfr, bool mergeCloseExons, bool keepAttrs, b
17101722
}
17111723
if (attrs_discarded) exons[0]->attrs->Pack();
17121724
}
1713-
if (exons.Count()==0 && isTranscript()) {
1714-
//add exon feature to an exonless transcript
1715-
addExon(this->start, this->end);
1716-
}
17171725
return this;
17181726
}
17191727

stringtie.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include "proc_mem.h"
1212
#endif
1313

14-
#define VERSION "1.3.3b"
14+
#define VERSION "1.3.4"
1515

1616
//#define DEBUGPRINT 1
1717

0 commit comments

Comments
 (0)