Skip to content

Commit d160c1d

Browse files
committed
unbundled guides are written to the output GTF for -e and --merge; unbundled genes to the -A file
1 parent ecaf22e commit d160c1d

1 file changed

Lines changed: 77 additions & 11 deletions

File tree

stringtie.cpp

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ char* sprintTime();
224224
void processBundle(BundleData* bundle);
225225
//void processBundle1stPass(BundleData* bundle); //two-pass testing
226226

227-
void writeUnbundledGuides(GVec<GRefData>& refdata, const FILE* fout, const FILE* gout);
227+
void writeUnbundledGuides(GVec<GRefData>& refdata, FILE* fout, FILE* gout=NULL);
228228

229229
#ifndef NOTHREADS
230230

@@ -658,11 +658,14 @@ if (ballgown)
658658
#ifdef B_DEBUG
659659
fclose(dbg_out);
660660
#endif
661-
//if (f_out && f_out!=stdout) fclose(f_out);
661+
if (mergeMode && guided )
662+
writeUnbundledGuides(refguides, f_out);
663+
662664
fclose(f_out);
663665
if (c_out && c_out!=stdout) fclose(c_out);
664666

665-
if(verbose) GMessage("Total number of records without the XS tag: %d\n",no_xs);
667+
if(verbose)
668+
GMessage("Total number of records without the XS tag: %d\n",no_xs);
666669

667670
if(!mergeMode) {
668671
if(verbose) {
@@ -1284,19 +1287,82 @@ int waitForData(BundleData* bundles) {
12841287
return bidx;
12851288
}
12861289

1287-
void writeUnbundledGuides(GVec<GRefData>& refdata, const FILE* fout, const FILE* gout) {
1290+
void writeUnbundledGuides(GVec<GRefData>& refdata, FILE* fout, FILE* gout) {
12881291
for (int g=0;g<refdata.Count();++g) {
12891292
GRefData& crefd=refdata[g];
12901293
if (crefd.rnas.Count()==0) continue;
1291-
GHash<CGene> geneabs; //gene_id abundances (all zero), accumulating coords
1292-
for (int i=0;i<crefd.rnas.Count();++i) {
1293-
GffObj &t = *crefd.rnas[i];
1294+
GHash<CGene> geneabs;
1295+
//gene_id abundances (0), accumulating coords
1296+
for (int m=0;m<crefd.rnas.Count();++m) {
1297+
GffObj &t = *crefd.rnas[m];
12941298
RC_TData &td = *(RC_TData*) (t.uptr);
12951299
if (td.in_bundle) continue;
1296-
//TODO: write these guides to output
1297-
// for -e and --merge
1298-
// and collect gene_id coords
1299-
1300+
//write these guides to output
1301+
//for --merge and -e
1302+
if (mergeMode || eonly) {
1303+
fprintf(fout, "%s\t%s\ttranscript\t%d\t%d\t.\t%c\t.\ttranscript_id \"%s\";",
1304+
crefd.gseq_name, t.getTrackName(), t.start, t.end, t.strand, t.getID());
1305+
if (t.getGeneID())
1306+
fprintf(fout, " gene_id \"%s\";", t.getGeneID());
1307+
if (eonly) {
1308+
if (t.getGeneName())
1309+
fprintf(fout, " ref_gene_name \"%s\";", t.getGeneName());
1310+
fprintf(fout, " cov \"0.0\"; FPKM \"0.0\"; TPM \"0.0\";");
1311+
}
1312+
else { //merge_mode
1313+
if (t.getGeneName())
1314+
fprintf(fout, " gene_name \"%s\";", t.getGeneName());
1315+
if (t.getGeneID())
1316+
fprintf(fout, " ref_gene_id \"%s\";", t.getGeneID());
1317+
}
1318+
fprintf(fout, "\n");
1319+
for (int e=0;e<t.exons.Count();++e) {
1320+
fprintf(fout,"%s\t%s\texon\t%d\t%d\t.\t%c\t.\ttranscript_id \"%s\"; exon_number \"%d\";",
1321+
crefd.gseq_name, t.getTrackName(), t.exons[e]->start, t.exons[e]->end, t.strand,
1322+
t.getID(), e+1);
1323+
if (t.getGeneID())
1324+
fprintf(fout, " gene_id \"%s\";", t.getGeneID());
1325+
if (eonly) {
1326+
if (t.getGeneName())
1327+
fprintf(fout, " ref_gene_name \"%s\";", t.getGeneName());
1328+
fprintf(fout, " cov \"0.0\";");
1329+
}
1330+
else { //mergeMode
1331+
if (t.getGeneName())
1332+
fprintf(fout, " gene_name \"%s\";", t.getGeneName());
1333+
if (t.getGeneID())
1334+
fprintf(fout, " ref_gene_id \"%s\";", t.getGeneID());
1335+
}
1336+
fprintf(fout, "\n");
1337+
}
1338+
}
1339+
if (gout!=NULL) {
1340+
//gather coords for this gene_id
1341+
char* geneid=t.getGeneID();
1342+
if (geneid==NULL) geneid=t.getGeneName();
1343+
if (geneid!=NULL) {
1344+
CGene* gloc=geneabs.Find(geneid);
1345+
if (gloc) {
1346+
if (gloc->strand!=t.strand)
1347+
GMessage("Warning: gene \"%s\" (on %s) has reference transcripts on both strands?\n",
1348+
geneid, crefd.gseq_name);
1349+
if (t.start<gloc->start) gloc->start=t.start;
1350+
if (t.end>gloc->end) gloc->end=t.end;
1351+
} else {
1352+
//add new geneid locus
1353+
geneabs.Add(geneid, new CGene(t.start, t.end, t.strand, t.getGeneID(), t.getGeneName()));
1354+
}
1355+
}
1356+
if (m==crefd.rnas.Count()-1) {
1357+
//write unbundled genes from this chromosome
1358+
geneabs.startIterate();
1359+
while (CGene* g=geneabs.NextData()) {
1360+
fprintf(gout, "%s\t%s\t%s\t%c\t%d\t%d\t%d\t0.0\t0.0\t0.0\n",
1361+
g->geneID, g->geneName, crefd.gseq_name,
1362+
g->strand, g->start, g->end, g->len());
1363+
}
1364+
}
1365+
} //if geneabundance
13001366
}
13011367
}
13021368
}

0 commit comments

Comments
 (0)