Skip to content

Commit 849e912

Browse files
committed
finally found the GBitVec error cause, fixed
1 parent 8d27659 commit 849e912

2 files changed

Lines changed: 82 additions & 32 deletions

File tree

rlink.cpp

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,11 @@ void processRead(int currentstart, int currentend, BundleData& bdata,
259259
bdata.end=currentend;
260260
}
261261

262-
float rdcount=float(1)/nh;
263-
if(nomulti) rdcount=1;
262+
float rdcount=1;
263+
float unitig_cov=brec.tag_float("Yk");
264+
if(unitig_cov) rdcount=unitig_cov;
265+
266+
if(!nomulti) rdcount/=nh;
264267
readlist[n]->read_count+=rdcount; // increase single count just in case I don't see the pair
265268

266269
// store the mismatch count per junction so that I can eliminate it later
@@ -1021,11 +1024,12 @@ float compute_cost(GVec<float>& wincov,float sumleft,float sumright,int leftstar
10211024

10221025
cost=basecost/len-leftcost/leftlen-rightcost/rightlen;
10231026

1027+
/*
10241028
fprintf(stderr,"leftlen=%d leftsum=%f leftcost=%f muleft=%f rightlen=%d righttsum=%f rightcost=%f muright=%f baselen=%d basesum=%f basecost=%f mubase=%f\n",
10251029
leftlen,sumleft,leftcost/leftlen,muleft,
10261030
rightlen,sumright,rightcost/rightlen,muright,
10271031
len,sumleft+sumright,basecost/len,mubase);
1028-
1032+
*/
10291033
return(cost);
10301034
}
10311035

@@ -1640,6 +1644,8 @@ CGraphnode *source2guide(int s, int g, int refstart,uint newstart,uint newend, C
16401644
GVec<float>* bpcov,GVec<float>& futuretr, int& graphno,CBundlenode *bundlenode,GVec<CGraphinfo> **bundle2graph,
16411645
GPVec<CGraphnode> **no2gnode, int &edgeno) {
16421646

1647+
//fprintf(stderr,"source2guide for newstart=%d and newend=%d\n",newstart,newend);
1648+
16431649
// compute maxabund
16441650
float leftcov=0;
16451651
float rightcov=0;
@@ -1681,6 +1687,8 @@ CGraphnode *guide2sink(int s, int g, int refstart,uint newstart,uint newend, CGr
16811687
GVec<float>* bpcov,GVec<float>& futuretr, int& graphno,CBundlenode *bundlenode,GVec<CGraphinfo> **bundle2graph,
16821688
GPVec<CGraphnode> **no2gnode, int &edgeno) {
16831689

1690+
//fprintf(stderr,"guide2sink for newstart=%d and newend=%d\n",newstart,newend);
1691+
16841692
// compute maxabund
16851693
float leftcov=0;
16861694
float rightcov=0;
@@ -1939,7 +1947,6 @@ GBitVec traverse_dfs(int s,int g,CGraphnode *node,CGraphnode *sink,GBitVec paren
19391947
}
19401948
else {
19411949
gpos[s][g].Add(key,lastgpos);
1942-
//fprintf(stderr,"s=%d g=%d key=%d lastgpos=%d add edge between %d and %d child\n",s,g,key,lastgpos,min, max);
19431950
childparents[lastgpos]=1;
19441951
node->childpat[lastgpos]=1;
19451952
lastgpos++;
@@ -2514,9 +2521,11 @@ CTreePat *construct_treepat(int gno, GIntHash<int>& gpos,GPVec<CTransfrag>& tran
25142521
void print_pattern(CTreePat *tree,GStr& pattern,int gno) {
25152522
if(!tree) return;
25162523
pattern+=tree->nodeno;
2524+
25172525
if(tree->tr) {
25182526
fprintf(stderr,"pat: %s %f\n",pattern.chars(),tree->tr->abundance);
25192527
}
2528+
25202529
for(int i=0;i<tree->childno;i++)
25212530
if(tree->nextpat[i]) {
25222531
GStr tmppat(pattern.chars());
@@ -3442,7 +3451,7 @@ CPrediction* store_merge_prediction(GVec<int>& alltr,GPVec<CMTransfrag>& mgt,GVe
34423451
for(int i=0;i<alltr.Count();i++) {
34433452
int a=alltr[i];
34443453
cov+=mgt[alltr[i]]->transfrag->abundance;
3445-
fprintf(stderr,"Add transfrag %d(%d %d) with cov=%f\n",alltr[i],mgt[alltr[i]]->nf,mgt[alltr[i]]->nl-mgt[alltr[i]]->transfrag->nodes.Count()+1,mgt[alltr[i]]->transfrag->abundance);
3454+
//fprintf(stderr,"Add transfrag %d(%d %d) with cov=%f\n",alltr[i],mgt[alltr[i]]->nf,mgt[alltr[i]]->nl-mgt[alltr[i]]->transfrag->nodes.Count()+1,mgt[alltr[i]]->transfrag->abundance);
34463455
mgt[alltr[i]]->transfrag->abundance=0;
34473456
if(enableNames || (alltr.Count()==1 && mgt[alltr[0]]->transfrag->real)) for(int j=0;j<mgt[a]->read.Count();j++) {
34483457
int r=mgt[a]->read[j];
@@ -7050,9 +7059,11 @@ float max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>&
70507059
}
70517060
}
70527061

7062+
/*
70537063
fprintf(stderr,"Used transcripts:");
70547064
for(int i=0;i<transfrag.Count();i++) if(istranscript[i]) fprintf(stderr," %d(%f)",i,transfrag[i]->abundance);
70557065
fprintf(stderr,"\n");
7066+
*/
70567067

70577068
for(int i=0;i<n;i++) link[i].Sort();
70587069

@@ -7112,7 +7123,7 @@ float max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>&
71127123
if(flow[n1][n2]>0) {
71137124
if(flow[n1][n2]<transfrag[t]->abundance) {
71147125
if(!i) sumout+=flow[n1][n2];
7115-
fprintf(stderr,"Update capacity of transfrag[%d] with value %f to %f\n",t,transfrag[t]->abundance, transfrag[t]->abundance-flow[n1][n2]);
7126+
//fprintf(stderr,"Update capacity of transfrag[%d] with value %f to %f\n",t,transfrag[t]->abundance, transfrag[t]->abundance-flow[n1][n2]);
71167127
update_capacity(0,transfrag[t],flow[n1][n2],nodecapacity,node2path);
71177128
//if(path[i] && transfrag[t]->nodes.Last()!=gno-1) fragno+=flow[n1][n2];
71187129
flow[n1][n2]=0;
@@ -7121,7 +7132,7 @@ float max_flow(int gno,GVec<int>& path,GBitVec& istranscript,GPVec<CTransfrag>&
71217132
if(!i) sumout+=transfrag[t]->abundance;
71227133
flow[n1][n2]-=transfrag[t]->abundance;
71237134
//if(path[i] && transfrag[t]->nodes.Last()!=gno-1) fragno+=transfrag[t]->abundance;
7124-
fprintf(stderr,"Update capacity of transfrag[%d] with value=%f to 0\n",t,transfrag[t]->abundance);
7135+
//fprintf(stderr,"Update capacity of transfrag[%d] with value=%f to 0\n",t,transfrag[t]->abundance);
71257136
update_capacity(0,transfrag[t],transfrag[t]->abundance,nodecapacity,node2path);
71267137
}
71277138
}
@@ -9233,6 +9244,7 @@ float store_transcript(GList<CPrediction>& pred,GVec<int>& path,GVec<float>& nod
92339244
if (t && t->uptr) {
92349245
RC_TData &td = *(RC_TData*) (t->uptr);
92359246
td.in_bundle=3;
9247+
//fprintf(stderr,"st guide %s is stored\n",t->getID());
92369248
}
92379249

92389250
/*
@@ -9327,6 +9339,7 @@ int store_guide_transcript(GList<CPrediction>& pred,GVec<int>& path,GVec<float>&
93279339
if (t && t->uptr) {
93289340
RC_TData &td = *(RC_TData*) (t->uptr);
93299341
td.in_bundle=3;
9342+
//fprintf(stderr,"sg guide %s is stored\n",t->getID());
93309343
}
93319344

93329345
update_guide_pred(pred,np,path,nodeflux,nodecov,no2gnode,gno,update);
@@ -9635,7 +9648,7 @@ void process_refguides(int gno,int edgeno,GIntHash<int>& gpos,int& lastgpos,GPVe
96359648

96369649
for(int g=0;g<guides.Count();g++) {
96379650
//fprintf(stderr,"Consider guide[%d out of %d] %s\n",g,guides.Count(),guides[g]->getID());
9638-
if((guides[g]->strand==strand) && ((RC_TData*)(guides[g]->uptr))->in_bundle==2 && (guides[g]->overlap(no2gnode[1]->start,no2gnode[gno-2]->end))) {
9651+
if((guides[g]->strand==strand || guides[g]->strand=='.') && ((RC_TData*)(guides[g]->uptr))->in_bundle>=2 && (guides[g]->overlap(no2gnode[1]->start,no2gnode[gno-2]->end))) {
96399652
CTransfrag *trguide=find_guide_pat(guides[g],no2gnode,gno,edgeno,gpos);
96409653
if(trguide) { // the guide can be found among the graph nodes
96419654
//CGuide newguide(trguide,guides[g]);
@@ -9647,9 +9660,9 @@ void process_refguides(int gno,int edgeno,GIntHash<int>& gpos,int& lastgpos,GPVe
96479660
fprintf(stderr,"Added guidetrf %d with ID=%s overlapping transcript interval %d - %d with nodes:",g,guides[g]->getID(),no2gnode[1]->start,no2gnode[gno-2]->end);
96489661
for(int i=0;i<trguide->nodes.Count();i++) fprintf(stderr," %d",trguide->nodes[i]);
96499662
fprintf(stderr,"\n");
9650-
fprintf(stderr,"s=%d strand = %c trguide[%d]=",s,strand,g);
9651-
printBitVec(trguide->pattern);
9652-
fprintf(stderr,"\n");
9663+
//fprintf(stderr,"s=%d strand = %c trguide[%d]=",s,strand,g);
9664+
//printBitVec(trguide->pattern);
9665+
//fprintf(stderr,"\n");
96539666
}
96549667
*/
96559668
}
@@ -9700,6 +9713,7 @@ void process_refguides(int gno,int edgeno,GIntHash<int>& gpos,int& lastgpos,GPVe
97009713

97019714
int *pos=gpos[edge(0,nodei,gno)];
97029715
if(!pos) { // here is where I need lastgpos
9716+
//fprintf(stderr,"Add source link to position %d for guide=%d\n",no2gnode[nodei]->start,guidetrf[g].g);
97039717
int key=edge(0,nodei,gno);
97049718
gpos.Add(key,lastgpos);
97059719
lastgpos++;
@@ -9774,6 +9788,7 @@ void process_refguides(int gno,int edgeno,GIntHash<int>& gpos,int& lastgpos,GPVe
97749788
guidetrf[g].trf->pattern[sink]=1;
97759789
int *pos=gpos[edge(nodei,sink,gno)];
97769790
if(!pos) {
9791+
//fprintf(stderr,"Add sink link from position %d for guide=%d\n",no2gnode[nodei]->end,guidetrf[g].g);
97779792
int key=edge(nodei,sink,gno);
97789793
gpos.Add(key,lastgpos);
97799794
lastgpos++;
@@ -10785,11 +10800,6 @@ int guides_pushmaxflow(int gno,int edgeno,GIntHash<int>& gpos,GPVec<CGraphnode>&
1078510800
bool include=true;
1078610801
if(guidepred[guidetrf[0].g]==-1) {
1078710802

10788-
if(!strcmp(guides[guidetrf[0].g]->getID(),"rna51274")) {
10789-
fprintf(stderr,"nodecov[1]=%f\n",nodecov[guidetrf[0].trf->nodes[1]]);
10790-
exit(0);
10791-
}
10792-
1079310803
store_transcript(pred,guidetrf[0].trf->nodes,nodeflux,nodecov,no2gnode,geneno,first,s,gno,gpos,include,pathpat,bdata,guides[guidetrf[0].g]);
1079410804
//if(eonly) { // this is not correct because it might have been assigned before
1079510805
guidepred[guidetrf[0].g]=pred.Count()-1; // NEED TO TEST: if this doesn't work for single genes I might want to recombine with the previous prediction in store_transcript
@@ -10856,10 +10866,6 @@ int guides_pushmaxflow(int gno,int edgeno,GIntHash<int>& gpos,GPVec<CGraphnode>&
1085610866
bool include=true;
1085710867
if(flux>epsilon) {
1085810868
if(guidepred[guidetrf[g].g]==-1) {
10859-
if(!strcmp(guides[guidetrf[g].g]->getID(),"rna51274")) {
10860-
fprintf(stderr,"nodecov[1]=%f\n",nodecov[guidetrf[g].trf->nodes[1]]);
10861-
exit(0);
10862-
}
1086310869
store_transcript(pred,guidetrf[g].trf->nodes,nodeflux,nodecov,no2gnode,geneno,first,s,gno,gpos,include,pathpat,bdata,guides[guidetrf[g].g]);
1086410870
//if(eonly) {
1086510871
guidepred[guidetrf[g].g]=pred.Count()-1; // NEED TO TEST: if this doesn't work for single genes I might want to recombine with the previous prediction in store_transcript
@@ -12228,16 +12234,36 @@ int build_graphs(BundleData* bdata) {
1222812234
}
1222912235
if(covered) {
1223012236
tdata->in_bundle=2;
12231-
int s=0;
12237+
int s=-1; // unknown strand
1223212238
if(guides[g]->strand=='+') s=1; // guide on positive strand
12233-
GEdge ge1(guides[g]->start,guides[g]->exons[0]->end,s);
12234-
int idx=guideedge.IndexOf(ge1);
12235-
if(idx<0) guideedge.Add(ge1);
12239+
else if(guides[g]->strand=='-') s=0; // guide on negative strand
12240+
12241+
//fprintf(stderr,"Look to add guide g=%d start %d-%d and end %d-%d on strand %d\n",g,guides[g]->start,guides[g]->exons[0]->end,guides[g]->end,guides[g]->exons.Last()->start,s);
12242+
12243+
int uses=s;
12244+
if(s<0) uses=0;
12245+
GEdge ge(guides[g]->start,guides[g]->exons[0]->end,uses);
12246+
int idx=guideedge.IndexOf(ge);
12247+
if(idx<0) guideedge.Add(ge);
1223612248
else if(guideedge[idx].endval>guides[g]->exons[0]->end) guideedge[idx].endval=guides[g]->exons[0]->end;
12237-
GEdge ge2(guides[g]->end,guides[g]->exons.Last()->start,s);
12238-
idx=guideedge.IndexOf(ge2);
12239-
if(idx<0) guideedge.Add(ge2);
12249+
if(s<0) {
12250+
ge.strand=1;
12251+
idx=guideedge.IndexOf(ge);
12252+
if(idx<0) guideedge.Add(ge);
12253+
else if(guideedge[idx].endval>guides[g]->exons[0]->end) guideedge[idx].endval=guides[g]->exons[0]->end;
12254+
}
12255+
12256+
ge.val=guides[g]->end;
12257+
ge.endval=guides[g]->exons.Last()->start;
12258+
idx=guideedge.IndexOf(ge);
12259+
if(idx<0) guideedge.Add(ge);
1224012260
else if(guideedge[idx].endval<guides[g]->exons.Last()->start) guideedge[idx].endval=guides[g]->exons.Last()->start;
12261+
if(s<0) {
12262+
ge.strand=0; // ge.strand was set as 1 before
12263+
idx=guideedge.IndexOf(ge);
12264+
if(idx<0) guideedge.Add(ge);
12265+
else if(guideedge[idx].endval<guides[g]->exons.Last()->start) guideedge[idx].endval=guides[g]->exons.Last()->start;
12266+
}
1224112267
}
1224212268
}
1224312269
}
@@ -12873,6 +12899,7 @@ int build_graphs(BundleData* bdata) {
1287312899
p->exoncov.Add(gcov);
1287412900
pred.Add(p);
1287512901
printguides=true;
12902+
guidepred[g]=pred.Count()-1;
1287612903
}
1287712904
}
1287812905

@@ -12999,11 +13026,17 @@ int build_graphs(BundleData* bdata) {
1299913026
lastgpos[s].cAdd(0);
1300013027
// I am overestmating the edgeno below, hopefully not by too much
1300113028

13029+
//fprintf(stderr,"Bundle is: %d - %d start at g=%d\n",bnode[sno][bundle[sno][b]->startnode]->start,bnode[sno][bundle[sno][b]->lastnodeid]->end,g);
13030+
1300213031
while(g<ng && guides[g]->end<bnode[sno][bundle[sno][b]->startnode]->start) g++;
13032+
1300313033
int cg=g;
1300413034
int nolap=0;
1300513035
while(cg<ng && guides[cg]->start<=bnode[sno][bundle[sno][b]->lastnodeid]->end) { // this are potential guides that might overlap the current bundle, and they might introduce extra edges
13006-
if(guides[cg]->strand==strnd && ((RC_TData*)(guides[cg]->uptr))->in_bundle==2) {
13036+
13037+
//fprintf(stderr,"...consider guide cg=%d with strand=%c and in_bundle=%d\n",cg,guides[cg]->strand,((RC_TData*)(guides[cg]->uptr))->in_bundle);
13038+
if((guides[cg]->strand==strnd || guides[cg]->strand=='.') && ((RC_TData*)(guides[cg]->uptr))->in_bundle>=2) {
13039+
//fprintf(stderr,"Add guide g=%d with start=%d end=%d\n",cg,guides[cg]->start,guides[cg]->end);
1300713040
edgeno[s][b]+=2; // this is an overestimate: possibly I have both an extra source and an extra sink link
1300813041
nolap++;
1300913042
}
@@ -13012,13 +13045,14 @@ int build_graphs(BundleData* bdata) {
1301213045

1301313046
/*
1301413047
{ // DEBUG ONLY
13048+
fprintf(stderr,"edgeno[%d][%d]=%d\n",s,b,edgeno[s][b]);
1301513049
if(bundle[sno][b]->nread) {
1301613050
fprintf(stderr,"proc bundle[%d][%d] %f/%f is %f len=%d and %d guides\n",sno,b,
1301713051
bundle[sno][b]->multi,bundle[sno][b]->nread,(float)bundle[sno][b]->multi/bundle[sno][b]->nread,
1301813052
bundle[sno][b]->len,nolap);
1301913053
}
1302013054
}
13021-
*/
13055+
*/
1302213056

1302313057
// here I can add something in stringtie to lower the mintranscript len if there are guides?
1302413058

@@ -13138,6 +13172,21 @@ int build_graphs(BundleData* bdata) {
1313813172
// include source to guide starts links
1313913173
GVec<CGuide> guidetrf;
1314013174

13175+
/*
13176+
{ // DEBUG ONLY
13177+
fprintf(stderr,"process refguides for s=%d b=%d edgeno=%d gno=%d\n",s,b,edgeno[s][b],graphno[s][b]);
13178+
fprintf(stderr,"There are %d nodes for graph[%d][%d]:\n",graphno[s][b],s,b);
13179+
for(int i=0;i<graphno[s][b];i++) {
13180+
fprintf(stderr,"%d (%d-%d): %f len=%d cov=%f",i,no2gnode[s][b][i]->start,no2gnode[s][b][i]->end,no2gnode[s][b][i]->cov,no2gnode[s][b][i]->len(),no2gnode[s][b][i]->cov/no2gnode[s][b][i]->len());
13181+
fprintf(stderr," parents:");
13182+
for(int j=0;j<no2gnode[s][b][i]->parent.Count();j++) fprintf(stderr," %d",no2gnode[s][b][i]->parent[j]);
13183+
fprintf(stderr," trf=");
13184+
for(int j=0;j<no2gnode[s][b][i]->trf.Count();j++) fprintf(stderr," %d",no2gnode[s][b][i]->trf[j]);
13185+
fprintf(stderr,"\n");
13186+
}
13187+
}
13188+
*/
13189+
1314113190
if(guides.Count()) process_refguides(graphno[s][b],edgeno[s][b],gpos[s][b],lastgpos[s][b],no2gnode[s][b],transfrag[s][b],s,guides,guidetrf,bdata);
1314213191

1314313192
//process transfrags to eliminate noise, and set compatibilities, and node memberships

stringtie.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include "proc_mem.h"
1212
#endif
1313

14-
#define VERSION "1.3.1b"
14+
#define VERSION "1.3.1c"
1515

1616
//#define DEBUGPRINT 1
1717

@@ -752,8 +752,9 @@ if(!mergeMode) {
752752
while(fgetline(linebuf,linebuflen,ftmp_in)) {
753753
//sscanf(linebuf,"%d %d %d %g %g", &nl, &tlen, &t_id, &fpkm, &tcov);
754754
sscanf(linebuf,"%d %d %d %d %g", &istr, &nl, &tlen, &t_id, &tcov);
755-
//for the rare cases tcov < 0, invert it
756-
if (tcov<0) tcov=-tcov; //should not happen
755+
//for the rare cases tcov < 0, invert it ??
756+
//if (tcov<0) tcov=-tcov; //should not happen
757+
if (tcov<0) tcov=0;
757758
calc_fpkm=tcov*1000000000/Frag_Len;
758759
calc_tpm=tcov*1000000/Cov_Sum;
759760
if(istr) { // this is a transcript

0 commit comments

Comments
 (0)