Skip to content

Commit 29d865b

Browse files
committed
fixing e2t.ctab not listing multiple t_id; a few other checks added
1 parent 1bb63a2 commit 29d865b

4 files changed

Lines changed: 75 additions & 25 deletions

File tree

rlink.cpp

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8443,15 +8443,25 @@ int print_signcluster(char strand,GList<CPrediction>& pred,GVec<int>& genes,GVec
84438443
fprintf(f_out,"%s\tStringTie\ttranscript\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; ",
84448444
refname.chars(),pred[n]->start,pred[n]->end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
84458445
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno]);
8446-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8447-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8446+
if(pred[n]->t_eq) {
8447+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8448+
if (pred[n]->t_eq->getGeneID())
8449+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8450+
if (pred[n]->t_eq->getGeneName())
8451+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8452+
}
84488453
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->cov);
84498454
for(int j=0;j<pred[n]->exons.Count();j++) {
84508455
fprintf(f_out,"%s\tStringTie\texon\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; exon_number \"%d\"; ",
84518456
refname.chars(),pred[n]->exons[j].start,pred[n]->exons[j].end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
84528457
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno],j+1); // maybe add exon coverage here
8453-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8454-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8458+
if(pred[n]->t_eq) {
8459+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8460+
if (pred[n]->t_eq->getGeneID())
8461+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8462+
if (pred[n]->t_eq->getGeneName())
8463+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8464+
}
84558465
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->exoncov[j]);
84568466
}
84578467
}
@@ -8708,15 +8718,25 @@ int print_cluster(GPVec<CPrediction>& pred,GVec<int>& genes,GVec<int>& transcrip
87088718
fprintf(f_out,"%s\tStringTie\ttranscript\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; ",
87098719
refname.chars(),pred[n]->start,pred[n]->end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
87108720
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno]);
8711-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8712-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8721+
if(pred[n]->t_eq) {
8722+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8723+
if (pred[n]->t_eq->getGeneID())
8724+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8725+
if (pred[n]->t_eq->getGeneName())
8726+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8727+
}
87138728
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->cov);
87148729
for(int j=0;j<pred[n]->exons.Count();j++) {
87158730
fprintf(f_out,"%s\tStringTie\texon\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; exon_number \"%d\"; ",
87168731
refname.chars(),pred[n]->exons[j].start,pred[n]->exons[j].end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
87178732
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno],j+1); // maybe add exon coverage here
8718-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8719-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8733+
if(pred[n]->t_eq) {
8734+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8735+
if (pred[n]->t_eq->getGeneID())
8736+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8737+
if (pred[n]->t_eq->getGeneName())
8738+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8739+
}
87208740
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->exoncov[j]);
87218741
}
87228742
}
@@ -8826,15 +8846,25 @@ int print_cluster_inclusion(GPVec<CPrediction>& pred,GVec<int>& genes,GVec<int>&
88268846
fprintf(f_out,"%s\tStringTie\ttranscript\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; ",
88278847
refname.chars(),pred[n]->start,pred[n]->end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
88288848
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno]);
8829-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8830-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8849+
if(pred[n]->t_eq) {
8850+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8851+
if (pred[n]->t_eq->getGeneID())
8852+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8853+
if (pred[n]->t_eq->getGeneName())
8854+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8855+
}
88318856
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->cov);
88328857
for(int j=0;j<pred[n]->exons.Count();j++) {
88338858
fprintf(f_out,"%s\tStringTie\texon\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; exon_number \"%d\"; ",
88348859
refname.chars(),pred[n]->exons[j].start,pred[n]->exons[j].end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
88358860
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno],j+1); // maybe add exon coverage here
8836-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8837-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8861+
if(pred[n]->t_eq) {
8862+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8863+
if (pred[n]->t_eq->getGeneID())
8864+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8865+
if (pred[n]->t_eq->getGeneName())
8866+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8867+
}
88388868
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->exoncov[j]);
88398869
}
88408870
}
@@ -8911,15 +8941,25 @@ int print_transcript_signcluster(char strand,GList<CPrediction>& pred,GVec<int>&
89118941
fprintf(f_out,"%s\tStringTie\ttranscript\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; ",
89128942
refname.chars(),pred[n]->start,pred[n]->end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
89138943
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno]);
8914-
if(pred[n]->t_eq) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8915-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8944+
if(pred[n]->t_eq) {
8945+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8946+
if (pred[n]->t_eq->getGeneID())
8947+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8948+
if (pred[n]->t_eq->getGeneName())
8949+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8950+
}
89168951
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->cov);
89178952
for(int j=0;j<pred[n]->exons.Count();j++) {
89188953
fprintf(f_out,"%s\tStringTie\texon\t%d\t%d\t1000\t%c\t.\tgene_id \"%s.%d\"; transcript_id \"%s.%d.%d\"; exon_number \"%d\"; ",
89198954
refname.chars(),pred[n]->exons[j].start,pred[n]->exons[j].end,pred[n]->strand,label.chars(),genes[pred[n]->geneno],
89208955
label.chars(),genes[pred[n]->geneno],transcripts[pred[n]->geneno],j+1); // maybe add exon coverage here
8921-
if(pred[n]->t_eq) fprintf(f_out, "reference_id \"%s\"; ", pred[n]->t_eq->getID());
8922-
//if(pred[n]->id) fprintf(f_out,"reference_id \"%s\"; ",pred[n]->id);
8956+
if(pred[n]->t_eq) {
8957+
fprintf(f_out,"reference_id \"%s\"; ",pred[n]->t_eq->getID());
8958+
if (pred[n]->t_eq->getGeneID())
8959+
fprintf(f_out,"ref_gene_id \"%s\"; ",pred[n]->t_eq->getGeneID());
8960+
if (pred[n]->t_eq->getGeneName())
8961+
fprintf(f_out,"ref_gene_name \"%s\"; ",pred[n]->t_eq->getGeneName());
8962+
}
89238963
fprintf(f_out,"cov \"%.6f\";\n",pred[n]->exoncov[j]);
89248964
}
89258965
}

stringtie.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ if (ballgown)
546546
if(!i) {
547547
//linebuf[strlen(line)-1]='\0';
548548
fprintf(f_out,"%s",linebuf);
549-
fprintf(f_out,"FPKM \"%.6f\";",calc_fpkm);
549+
fprintf(f_out," FPKM \"%.6f\";",calc_fpkm);
550550
//fprintf(f_out,"FPKM \"%.6f\"; calculated_FPKM \"%.6f\";",tcov*1000000000/Frag_Len,fpkm*1000000000/(Num_Fragments*tlen));
551551
//fprintf(f_out,"flen \"%.6f\"; FPKM \"%.6f\";",fpkm,fpkm*1000000000/Num_Fragments);
552552
fprintf(f_out,"\n");
@@ -638,7 +638,12 @@ GStr Process_Options(GArgs* args) {
638638
s=args->getOpt('j');
639639
if (!s.is_empty()) junctionthr=s.asInt();
640640
s=args->getOpt('c');
641-
if (!s.is_empty()) readthr=(float)s.asDouble();
641+
if (!s.is_empty()) {
642+
readthr=(float)s.asDouble();
643+
if (readthr<0.001) {
644+
GError("Error: invalid -c value, must be >=0.001)\n");
645+
}
646+
}
642647
s=args->getOpt('l');
643648
if (!s.is_empty()) label=s;
644649
s=args->getOpt('f');

tablemaker.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ void rc_write_RCfeature( GPVec<RC_ScaffData>& rcdata, GPVec<RC_Feature>& feature
250250
bool is_exon=false) {
251251
for (int i=0;i<features.Count();++i) {
252252
RC_Feature& f=*(features[i]);
253-
const char* ref_name=rcdata[f.t_id-1]->scaff->getGSeqName();
253+
const char* ref_name=rcdata[f.t_ids[0]-1]->scaff->getGSeqName();
254254
if (is_exon) {
255255
fprintf(fdata, "%u\t%s\t%c\t%d\t%d\t%d\t%d\t%.2f\t%.4f\t%.4f\t%.4f\t%.4f\n",
256256
f.id, ref_name, f.strand, f.l, f.r, f.rcount,
@@ -261,7 +261,8 @@ void rc_write_RCfeature( GPVec<RC_ScaffData>& rcdata, GPVec<RC_Feature>& feature
261261
f.strand, f.l, f.r, f.rcount, f.ucount, f.mrcount);
262262
}
263263
// f2t -------
264-
fprintf(f2t, "%u\t%u\n", f.id, f.t_id);
264+
for (int t=0;t<f.t_ids.Count();++t)
265+
fprintf(f2t, "%u\t%u\n", f.id, f.t_ids[t]);
265266
} //for each feature
266267
fclose(fdata);
267268
fclose(f2t);
@@ -345,7 +346,7 @@ void RC_ScaffData::addFeature(int fl, int fr, GPVec<RC_Feature>& fvec,
345346
}
346347
}
347348
}
348-
if (add_new) {
349+
if (add_new) { //never seen this feature before
349350
newseg.id = ++f_id;
350351
pair< set<RC_ScaffSeg>::iterator, bool> ret = fset.insert(newseg);
351352
//ret.second = was_inserted (indeed new)
@@ -362,6 +363,9 @@ void RC_ScaffData::addFeature(int fl, int fr, GPVec<RC_Feature>& fvec,
362363
#endif
363364
GASSERT((uint)fdata.Count()==f_id);
364365
}
366+
else { //feature seen before, update its parent list
367+
fdata[newseg.id-1]->t_ids.Add(this->t_id);
368+
}
365369
//fvec.push_back(newseg);
366370
GASSERT(fdata[newseg.id-1]->id==newseg.id);
367371
fvec.Add(fdata[newseg.id-1]);

tablemaker.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ struct RC_ScaffSeg {
4848

4949
struct RC_Feature { //exon or intron of a reference transcript
5050
uint id; //feature id (>0)
51-
uint t_id; //transcript id;
51+
GVec<uint> t_ids; //transcripts owning this feature
5252
int l; int r; //genomic coordinates
5353
char strand;
5454
mutable uint rcount; //# reads covering this feature
@@ -66,17 +66,18 @@ struct RC_Feature { //exon or intron of a reference transcript
6666
}
6767
};
6868

69-
RC_Feature(int l0=0, int r0=0, char s='.', uint fid=0, uint tid=0): id(fid), t_id(tid), l(l0), r(r0),
69+
RC_Feature(int l0=0, int r0=0, char s='.', uint fid=0, uint tid=0): id(fid), t_ids(1), l(l0), r(r0),
7070
strand(s), rcount(0),ucount(0),mrcount(0), avg(0), stdev(0), mavg(0), mstdev(0) {
7171
if (l>r) { int t=l; l=r; r=t; }
72+
if (tid>0) t_ids.Add(tid);
7273
}
7374

74-
RC_Feature(RC_ScaffSeg& seg, uint tid=0): id(seg.id), t_id(tid), l(seg.l), r(seg.r),
75+
RC_Feature(RC_ScaffSeg& seg, uint tid=0): id(seg.id), t_ids(1), l(seg.l), r(seg.r),
7576
strand(seg.strand), rcount(0),ucount(0),mrcount(0), avg(0), stdev(0), mavg(0), mstdev(0) {
7677
if (l>r) { int t=l; l=r; r=t; }
78+
if (tid>0) t_ids.Add(tid);
7779
}
7880

79-
8081
bool operator<(const RC_Feature& o) const {
8182
//if (id == o.id) return false;
8283
if (l != o.l) return (l < o.l);

0 commit comments

Comments
 (0)