Skip to content

Commit cf9bdac

Browse files
committed
fixed CRAM fidelity issue, --ref/cram-ref option added
1 parent b91d0f5 commit cf9bdac

5 files changed

Lines changed: 31 additions & 55 deletions

File tree

gclib/GSam.h

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -254,51 +254,30 @@ class GSamReader {
254254
sam_hdr_t* hdr;
255255
bam1_t* b_next; //for light next(GBamRecord& b)
256256
public:
257-
void bopen(const char* filename, int32_t required_fields,
258-
const char* cram_refseq=NULL) {
257+
void bopen(const char* filename, const char* cram_refseq=NULL, int32_t cram_req_fields=0) {
259258
hts_file=hts_open(filename, "r");
260259
if (hts_file==NULL)
261260
GError("Error: could not open alignment file %s \n",filename);
262261
if (hts_file->is_cram) {
263262
if (cram_refseq!=NULL) {
264263
hts_set_opt(hts_file, CRAM_OPT_REFERENCE, cram_refseq);
265-
}
266-
if (required_fields==0) {
267-
required_fields=SAM_QNAME|SAM_FLAG|SAM_RNAME|SAM_POS|SAM_MAPQ|SAM_CIGAR|
268-
SAM_RNEXT|SAM_PNEXT|SAM_TLEN|SAM_AUX;
269-
}
270-
hts_set_opt(hts_file, CRAM_OPT_REQUIRED_FIELDS,
271-
required_fields);
264+
hts_set_opt(hts_file, CRAM_OPT_DECODE_MD, 1);
265+
} else {
266+
if (cram_req_fields==0) {
267+
cram_req_fields=SAM_QNAME|SAM_FLAG|SAM_RNAME|SAM_POS|SAM_MAPQ|SAM_CIGAR|
268+
SAM_RNEXT|SAM_PNEXT|SAM_TLEN|SAM_AUX;
269+
}
270+
hts_set_opt(hts_file, CRAM_OPT_REQUIRED_FIELDS,
271+
cram_req_fields);
272+
}
272273
}
273274
fname=Gstrdup(filename);
274275
hdr=sam_hdr_read(hts_file);
275276
}
276277

277-
void bopen(const char* filename, const char* cram_refseq=NULL) {
278-
hts_file=hts_open(filename, "r");
279-
if (hts_file==NULL)
280-
GError("Error: could not open alignment file %s \n",filename);
281-
if (hts_file->is_cram) {
282-
if (cram_refseq!=NULL) {
283-
hts_set_opt(hts_file, CRAM_OPT_REFERENCE, cram_refseq);
284-
}
285-
else hts_set_opt(hts_file, CRAM_OPT_REQUIRED_FIELDS,
286-
SAM_QNAME|SAM_FLAG|SAM_RNAME|SAM_POS|SAM_MAPQ|SAM_CIGAR|
287-
SAM_RNEXT|SAM_PNEXT|SAM_TLEN|SAM_AUX );
288-
289-
}
290-
fname=Gstrdup(filename);
291-
hdr=sam_hdr_read(hts_file);
292-
}
293-
294-
GSamReader(const char* fn, int32_t required_fields,
295-
const char* cram_ref=NULL):hts_file(NULL),fname(NULL), hdr(NULL), b_next(NULL) {
296-
bopen(fn, required_fields, cram_ref);
297-
}
298-
299-
GSamReader(const char* fn, const char* cram_ref=NULL):hts_file(NULL),fname(NULL),
300-
hdr(NULL), b_next(NULL) {
301-
bopen(fn, cram_ref);
278+
GSamReader(const char* fn, const char* cram_ref=NULL,
279+
int32_t required_fields=0):hts_file(NULL),fname(NULL), hdr(NULL), b_next(NULL) {
280+
bopen(fn, cram_ref, required_fields);
302281
}
303282

304283
sam_hdr_t* header() {

rlink.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,7 @@ bool exonmatch(GVec<GSeg> &prevexons, GVec<GSeg> &exons) {
207207
}
208208

209209
bool mismatch_anchor(CReadAln *rd,char *mdstr,int refstart, bam1_t *b) {
210-
//TODO DEBUG ONLY see if MD string influences anything!
211-
return false;
212-
//TODO
213-
if(mdstr==NULL) return false;
214-
210+
if (mdstr==NULL) return false;
215211
//--make a copy of the string, in case the original is a const string
216212
// (because the parseUInt() function modifies the string temporarily
217213
char* mdstring=Gstrdup(mdstr);

stringtie.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ Options:\n\
7676
-x do not assemble any transcripts on the given reference sequence(s)\n\
7777
-u no multi-mapping correction (default: correction enabled)\n\
7878
-h print this usage message and exit\n\
79+
--ref/--cram-ref reference FASTA file for CRAM input\n\
7980
\n\
8081
Transcript merge usage mode: \n\
8182
stringtie --merge [Options] { gtf_list | strg1.gtf ...}\n\
@@ -133,6 +134,7 @@ FILE* c_out=NULL;
133134
GStr outfname;
134135
GStr out_dir;
135136
GStr tmp_path;
137+
GStr cram_ref; //reference genome FASTA for CRAM input
136138
GStr tmpfname;
137139
GStr genefname;
138140
GStr traindir; // training directory for CDS option
@@ -278,7 +280,7 @@ int waitForData(BundleData* bundles);
278280

279281

280282

281-
#define DBG_ALN_DATA 1
283+
//#define DBG_ALN_DATA 1
282284
#ifdef DBG_ALN_DATA
283285
FILE* fdbgaln=NULL;
284286
void dbg_waln(GSamRecord* b) {
@@ -292,8 +294,9 @@ int waitForData(BundleData* bundles);
292294
}
293295
// gseqname, flags, readname, start, end, cigar, nh, hi
294296
char* pcigar=b->cigar();
295-
fprintf(fdbgaln, "%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\n", b->refName(), b->flags(),
296-
b->name(), b->start, b->end, pcigar, (uint)b->tag_int("NH"), (uint)b->tag_int("HI"));
297+
fprintf(fdbgaln, "%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b->refName(), b->flags(),
298+
b->name(), b->start, b->end, pcigar, (uint)b->tag_int("NM"), (uint)b->tag_int("NH"), (uint)b->tag_int("HI"),
299+
b->mate_refId(), b->mate_start(), b->clipL, b->clipR);
297300
GFREE(pcigar);
298301
}
299302

@@ -305,7 +308,7 @@ int main(int argc, char* argv[]) {
305308

306309
// == Process arguments.
307310
GArgs args(argc, argv,
308-
"debug;help;version;viral;conservative;mix;cds=;keeptmp;rseq=;ptf=;bam;fr;rf;merge;"
311+
"debug;help;version;viral;conservative;mix;ref=;cram-ref=cds=;keeptmp;rseq=;ptf=;bam;fr;rf;merge;"
309312
"exclude=zihvteuLRx:n:j:s:D:G:C:S:l:m:o:a:j:c:f:p:g:P:M:Bb:A:E:F:T:");
310313
args.printError(USAGE, true);
311314

@@ -837,7 +840,6 @@ if (ballgown)
837840
fclose(fdbgaln);
838841
#endif
839842

840-
841843
#ifdef B_DEBUG
842844
fclose(dbg_out);
843845
#endif
@@ -1050,20 +1052,21 @@ void processOptions(GArgs& args) {
10501052
}
10511053
else if(mergeMode) mintranscriptlen=50;
10521054

1053-
/*
1054-
if (args.getOpt('S')) {
1055-
// sensitivitylevel=2; no longer supported from version 1.0.3
1056-
sensitivitylevel=1;
1057-
}
1058-
*/
1059-
10601055
s=args.getOpt("rseq");
10611056
if (s.is_empty())
10621057
s=args.getOpt('S');
10631058
if (!s.is_empty()) {
10641059
gfasta=new GFastaDb(s.chars());
10651060
}
10661061

1062+
//-- cram ref sequence
1063+
s=args.getOpt("ref");
1064+
if (s.is_empty())
1065+
s=args.getOpt("cram-ref");
1066+
if (!s.is_empty()) {
1067+
cram_ref=s;
1068+
}
1069+
10671070
/*traindir=args.getOpt("cds");
10681071
if(!traindir.is_empty()) {
10691072
if(gfasta==NULL) GError("Genomic sequence file is required for --cds option.\n");

tmerge.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "tmerge.h"
22

3-
43
void TInputFiles::Add(const char* fn) {
54
GStr sfn(fn);
65
if (sfn!="-" && fileExists(fn)<2) {
@@ -136,9 +135,7 @@ int TInputFiles::start() {
136135
}
137136
//stringtie multi-BAM input
138137
for (int i=0;i<bamfiles.Count();++i) {
139-
//GSamReader* bamreader=new GSamReader(bamfiles[i].chars(),
140-
// (bamfiles[i]=="-" && forceBAM));
141-
GSamReader* bamreader=new GSamReader(bamfiles[i].chars());
138+
GSamReader* bamreader=new GSamReader(bamfiles[i].chars(), cram_ref.is_empty() ? NULL : cram_ref.chars());
142139
readers.Add(bamreader);
143140
GSamRecord* brec=bamreader->next();
144141
if (brec)

tmerge.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "GList.hh"
55
#include "rlink.h"
66
extern GStr tmp_path;
7+
extern GStr cram_ref;
78
extern bool keepTempFiles;
89

910
struct TInputRecord {

0 commit comments

Comments
 (0)