@@ -49,8 +49,8 @@ struct RC_TSeg {
4949*/
5050
5151struct RC_Feature { // exon or intron of a reference transcript
52- uint id; // feature id (>0) only used with -B (full Ballgown data)
53- // it's the index in the global refguides_RC_exons /introns array + 1
52+ uint id; // feature id (>0), +1 to the index either in global guides_RC_exons/introns if ballgown,
53+ // or in bundle_RC_exons /introns if not ballgown
5454 GVec<uint> t_ids; // transcripts owning this feature
5555 // if -B, this is the index in the global refguides_RC_Data array + 1
5656 // otherwise it is the index in the BundleData::keepguides array + 1
@@ -128,6 +128,25 @@ struct RC_Feature { //exon or intron of a reference transcript
128128 }
129129};
130130
131+ // for locus tracking and coverage, keep merged exons/introns in the locus
132+ struct GSegInfo {
133+ int t_id; // index of RC_TData in the guides_RC_data table + 1
134+ int exonum; // exon number
135+ GSegInfo (int tid=-1 , int en=-1 ):t_id(tid), exonum(en) { }
136+ };
137+ class GMSeg :public GSeg {
138+ public:
139+ GVec<GSegInfo> msegs; // keep track of exons contributing to this merged exon
140+ GMSeg (int l=0 , int r=0 , int tid=-1 , int eno=-1 ):GSeg(l,r), msegs(tid, eno) {
141+ }
142+ };
143+
144+ // reference locus - formed by exon-overlapping transcripts
145+ class GRefLocus :public GSeg {
146+ GArray<GMSeg> mexons; // merged exons in this locus (from any transcript)
147+ GPVec<GffObj> rnas; // transcripts in this locus
148+ };
149+
131150struct RC_ExonOvl {
132151 RC_Feature* feature; // pointer to an item of RC_BundleData::g_exons
133152 int mate_ovl; // = 1 if the mate of this read overlaps the same exon
@@ -167,27 +186,23 @@ struct RC_TData { //storing RC data for a transcript
167186 // only used with -B (full Ballgown data)
168187 GffObj* ref_t ;
169188 uint t_id;
170- // GStr t_name; //original GFF ID for the transcript
171189 int l;
172190 int r;
173- // char strand;
174- // int num_exons;
191+ char in_bundle; // 1 if used by read bundles, 2 if it has any direct read overlap
192+ GRefLocus* locus; // pointer to a locus info
175193 int eff_len;
176194 double cov;
177195 double fpkm;
178196 // char strand;
179197 GPVec<RC_Feature> t_exons;
180198 GPVec<RC_Feature> t_introns;
181- // int e_idx_cache;
182- // int i_idx_cache;
183199 void rc_addFeatures (uint& c_e_id, GList<RC_Feature>& fexons, GPVec<RC_Feature>& edata,
184200 uint& c_i_id, GList<RC_Feature>& fintrons, GPVec<RC_Feature>& idata);
185201 void addFeature (int fl, int fr, GPVec<RC_Feature>& fvec, uint& f_id,
186202 GList<RC_Feature>& fset, GPVec<RC_Feature>& fdata,
187203 int & cache_idx);
188- RC_TData (GffObj& s, uint id=0 ):ref_t (&s), t_id(id), l(s.start), r(s.end), // t_name(s.getID()),
189- // num_exons(s.exons.Count()),
190- eff_len (s.covlen), cov(0 ), fpkm(0 ), // strand(s.strand),
204+ RC_TData (GffObj& s, uint id=0 ):ref_t (&s), t_id(id), l(s.start), r(s.end),
205+ in_bundle (0 ), locus(NULL ), eff_len(s.covlen), cov(0 ), fpkm(0 ), // strand(s.strand),
191206 t_exons(false ), t_introns(false ) { // , e_idx_cache(-1), i_idx_cache(-1) {
192207 }
193208
@@ -196,7 +211,6 @@ struct RC_TData { //storing RC data for a transcript
196211 if (r != o.r ) return (r < o.r );
197212 if (char c=(ref_t ->strand - o.ref_t ->strand )) return (c<0 );
198213 return (strcmp (ref_t ->getID (), o.ref_t ->getID ())<0 );
199- // return false;
200214 }
201215 bool operator ==(const RC_TData& o) const {
202216 if (t_id!=0 && o.t_id !=0 ) return (t_id==o.t_id );
@@ -236,8 +250,8 @@ struct RC_BundleData {
236250 int rmax;
237251 GPVec<RC_TData> g_tdata; // raw counting data for all transcripts in this bundle
238252 // RC_TData::t_id-1 = the element index in this array
239- GList<RC_Feature> g_exons; // set of guide exons in this bundle, sorted by their start coordinate
240- GList<RC_Feature> g_introns; // set of guide introns in this bundle, sorted by their start coordinate
253+ GList<RC_Feature> g_exons; // set of guide exons in this bundle, sorted by start coordinate
254+ GList<RC_Feature> g_introns; // set of guide introns in this bundle, sorted by start coordinate
241255 // RC_FeatIt xcache; //cache the first exon overlapping xcache_pos to speed up exon-overlap queries (findOvlExons())
242256 int xcache; // exons index of the first exon overlapping xcache_pos
243257 int xcache_pos; // left coordinate of last cached exon overlap query (findOvlExons())
@@ -250,10 +264,11 @@ struct RC_BundleData {
250264
251265 // -- when no global Ballgown data is to be generated, these are
252266 // local (bundle) stable order tables of guide features
253- GPVec<RC_TData>* guides_RC_tdata ; // just a pointer to the current RC tdata table
267+ GPVec<RC_TData>* bundle_RC_tdata ; // pointer to the global RC tdata table
254268 // RC_Feature::id-1 = the index in these arrays:
255- GPVec<RC_Feature>* guides_RC_exons;
256- GPVec<RC_Feature>* guides_RC_introns;
269+ GPVec<RC_Feature>* bundle_RC_exons; // pointers to global (if ballgown)
270+ GPVec<RC_Feature>* bundle_RC_introns;// OR local exon/intron RC data
271+ // local exon/intron ids within the bundle
257272 uint c_exon_id;
258273 uint c_intron_id;
259274 // --
@@ -263,16 +278,18 @@ struct RC_BundleData {
263278 // features:(sorted, free, unique)
264279 g_exons (true , false , true ), g_introns(true , false , true ),
265280 xcache (0 ), xcache_pos(0 ),
266- guides_RC_tdata (rc_tdata), guides_RC_exons (rc_exons), guides_RC_introns (rc_introns),
281+ bundle_RC_tdata (rc_tdata), bundle_RC_exons (rc_exons), bundle_RC_introns (rc_introns),
267282 c_exon_id (0 ), c_intron_id(0 )
268283 {
269284 if (ballgown) {
270285 if (rmax>lmin) updateCovSpan ();
271286 }else {
272- g_tdata.setFreeItem (true );
273- guides_RC_tdata = &g_tdata;
274- guides_RC_exons = new GPVec<RC_Feature>(true );
275- guides_RC_introns= new GPVec<RC_Feature>(true );
287+ // g_tdata.setFreeItem(true);
288+ // guides_RC_tdata = &g_tdata;
289+ // -- override the passed rc_exons/rc_introns if not ballgown
290+ // -- because these are now locally maintained so they'll be deallocated with the bundle
291+ bundle_RC_exons = new GPVec<RC_Feature>(true );
292+ bundle_RC_introns= new GPVec<RC_Feature>(true );
276293 }
277294 }
278295
@@ -282,72 +299,46 @@ struct RC_BundleData {
282299 r_cov.clear ();
283300 r_mcov.clear ();
284301 if (!ballgown) {
285- delete guides_RC_exons ;
286- delete guides_RC_introns ;
302+ delete bundle_RC_exons ;
303+ delete bundle_RC_introns ;
287304 }
288305 }
289306
290- // for non-ballgown tracking of guide features
291- /*
292- void addTranscriptFeature(uint fstart, uint fend, char fstrand, GList<RC_Feature>& flist, GffObj& t) {
293- uint tidx=(uint)t.udata; //in non-Ballgown case, this is the index in the BundleData::keepguides array + 1
294- RC_Feature* feat=new RC_Feature(fstart, fend, fstrand, 0, tidx);
295- int fidx=-1;
296- RC_Feature* p=flist.AddIfNew(feat, true, &fidx);
297- if (p==feat) {//exon never seen before
298- //TODO: use some stable e_id index here and assign it to t.exons[eidx]->uptr->e_id (?) for quick retrieval
299- } else {//exon seen before, just update t_id list
300- p->t_ids.Add(tidx);
301- }
302- }
303- */
304- uint addTranscript (GffObj& t) { // shouuld return the guide index in *guides_RC_tdata
305- // if (!ps.rc_id_data()) return;
306- // RC_ScaffIds& sdata = *(ps.rc_id_data());
307- bool boundary_changed=false ;
308- if (lmin==0 || lmin>(int )t.start ) { lmin=t.start ; boundary_changed=true ; }
309- if (rmax==0 || rmax<(int )t.end ) { rmax=t.end ; boundary_changed=true ; }
310- RC_TData* tdata=NULL ;
307+ uint addTranscript (GffObj& t) { // should return the guide index in *guides_RC_tdata
308+ bool boundary_changed=false ;
309+ if (lmin==0 || lmin>(int )t.start ) { lmin=t.start ; boundary_changed=true ; }
310+ if (rmax==0 || rmax<(int )t.end ) { rmax=t.end ; boundary_changed=true ; }
311+ GASSERT (t.uptr ); // we should always have a RC_TData for each guide
312+ RC_TData* tdata=(RC_TData*)(t.uptr );
313+ /* RC_TData* tdata=NULL;
311314 if (ballgown) {
312- tdata=(RC_TData*)(t.uptr );
315+ tdata=(RC_TData*)(t.uptr);
313316 }
314317 else {
315- // int c_tid=g_tdata.Count();
316- // add RC transcript data locally for the bundle
317- tdata=new RC_TData (t, g_tdata.Count ()+1 );
318- t.uptr =tdata;
319- // guides_RC_Data.Add(tdata);
320- tdata->rc_addFeatures (c_exon_id, g_exons, *guides_RC_exons,
321- c_intron_id, g_introns, *guides_RC_introns);
322- }
323- // if (ballgown) { //ballgown case
324- // RC_TData& sdata=*(RC_TData*)(t.uptr);
325- g_tdata.Add (tdata);
326- if (ballgown) {
327- if (boundary_changed) updateCovSpan ();
328- // need to add exons and introns because rc_addFeatures()
329- // was NOT using g_exons and g_introns as unique sets if ballgown
330- for (int i=0 ;i<tdata->t_exons .Count ();i++) {
331- g_exons.Add (tdata->t_exons [i]);
332- }
333- for (int i=0 ;i<tdata->t_introns .Count ();i++) {
334- g_introns.Add (tdata->t_introns [i]);
335- }
318+ //add RC transcript data locally for the bundle
319+ tdata=new RC_TData(t, g_tdata.Count()+1);
320+ t.uptr=tdata;
321+ //guides_RC_Data.Add(tdata);
322+ tdata->rc_addFeatures(c_exon_id, g_exons, *guides_RC_exons,
323+ c_intron_id, g_introns, *guides_RC_introns);
324+ }*/
325+ g_tdata.Add (tdata);
326+ if (ballgown) {
327+ if (boundary_changed) updateCovSpan ();
328+ // rc_addFeatures() called before, but we still need to add exons
329+ // and introns to the local sets: g_exons, g_introns
330+ for (int i=0 ;i<tdata->t_exons .Count ();i++) {
331+ g_exons.Add (tdata->t_exons [i]);
336332 }
337- // } //if ballgown
338- /*
339- else { //non-ballgown, regular storage of bundle guides data
340- //use GffObj::udata as tid, it's the index in bundledata::keepguides
341- for (int i=0;i<t.exons.Count();++i) {
342- addTranscriptFeature(t.exons[i]->start, t.exons[i]->end, t.strand, g_exons, t);
343- if (i>0) {
344- //add intron
345- addTranscriptFeature(t.exons[i-1]->end+1, t.exons[i]->start-1, t.strand, g_introns, t);
346- }
333+ for (int i=0 ;i<tdata->t_introns .Count ();i++) {
334+ g_introns.Add (tdata->t_introns [i]);
347335 }
348- } //no ballgown coverage
349- */
350- return tdata->t_id ;
336+ }
337+ else {
338+ tdata->rc_addFeatures (c_exon_id, g_exons, *bundle_RC_exons,
339+ c_intron_id, g_introns, *bundle_RC_introns);
340+ }
341+ return tdata->t_id ;
351342 }
352343
353344 void updateCovSpan () {
0 commit comments