Skip to content

Commit 9f2682a

Browse files
committed
Add SAMPE/BAMPE file loader
This file loader fetch both reads of a pair to determine 5' position, it avoids the issue that we couldn't infer CIGAR string of read2 given only read1 in SAMFileHitLoader
1 parent 5b86aa2 commit 9f2682a

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

src/org/seqcode/deepseq/hitloaders/HitLoaderFactory.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public HitLoader makeFileHitLoader(String filename, String format, boolean useNo
6262
if(!file.isFile() && !format.equals("HDF5Cache")){System.err.println("File not found: "+file.getName());System.exit(1);}
6363
if(format.equals("SAM") || format.equals("BAM")){
6464
currReader = new SAMFileHitLoader(file,useNonUnique, econfig.getLoadType1Reads(), econfig.getLoadType2Reads(), econfig.getLoadRead2(), econfig.getLoadPairs());
65+
}if(format.equals("SAMPE") || format.equals("BAMPE")){
66+
currReader = new SAMFilePEHitLoader(file,useNonUnique, econfig.getLoadType1Reads(), econfig.getLoadType2Reads());
6567
}else if(format.equals("TOPSAM")){
6668
currReader = new TophatFileHitLoader(file,useNonUnique, econfig.getLoadType1Reads(), econfig.getLoadType2Reads(), econfig.getLoadRead2(), econfig.getLoadPairs());
6769
}else if(format.equals("NOVO")){
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package org.seqcode.deepseq.hitloaders;
2+
3+
import java.io.File;
4+
import java.io.IOException;
5+
import java.util.ArrayList;
6+
import java.util.HashMap;
7+
import java.util.Map;
8+
import java.util.Collection;
9+
10+
import org.seqcode.deepseq.HitPair;
11+
import org.seqcode.deepseq.Read;
12+
import org.seqcode.deepseq.ReadHit;
13+
14+
import htsjdk.samtools.SAMRecord;
15+
import htsjdk.samtools.SamReader;
16+
import htsjdk.samtools.ValidationStringency;
17+
import htsjdk.samtools.SamReaderFactory;
18+
import htsjdk.samtools.util.CloseableIterator;
19+
20+
21+
/**
22+
* SAMFilePairHitLoader: A FileHitLoader for pair-end SAM and BAM files.
23+
* Accounts for uniqueness of hits according to user-specified option.
24+
* Ignores secondary & supplementary (i.e. chimeric) alignments.
25+
* Assume SAM/BAM file is sorted
26+
*
27+
* @author jianyu
28+
*
29+
*/
30+
public class SAMFilePEHitLoader extends FileHitLoader{
31+
32+
private boolean useChimericReads=false; //Ignore chimeric mappings for now.
33+
34+
public SAMFilePEHitLoader(File f, boolean nonUnique, boolean loadT1Reads, boolean loadT2Reads) {
35+
super(f, nonUnique, true, false, true, true);
36+
if(!loadT1Reads || loadT2Reads)
37+
System.err.println("SAMFileHitLoader: You asked to load only Type1 or Type2 reads, we do not yet load this information from SAM format.");
38+
}
39+
40+
/**
41+
* Get the reads from the appropriate source (implementation-specific).
42+
* Loads pairs to hitPairsList
43+
*/
44+
public void sourceAllHits() {
45+
this.initialize();
46+
SamReaderFactory factory =
47+
SamReaderFactory.makeDefault()
48+
.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS,
49+
SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS)
50+
.validationStringency(ValidationStringency.SILENT);
51+
SamReader reader = factory.open(file);
52+
CloseableIterator<SAMRecord> iter = reader.iterator();
53+
Map<String, SAMRecord> byRead = new HashMap<String, SAMRecord>();
54+
55+
SAMRecord r1Record;
56+
SAMRecord r2Record;
57+
while (iter.hasNext()) {
58+
SAMRecord record = iter.next();
59+
60+
if(record.getReadUnmappedFlag()) {continue; }
61+
if(record.isSecondaryOrSupplementary() && !useChimericReads){continue;}
62+
63+
if(byRead.containsKey(record.getReadName())) {
64+
if (record.getFirstOfPairFlag()) {
65+
r1Record = record;
66+
r2Record = byRead.remove(record.getReadName());
67+
} else {
68+
r1Record = byRead.remove(record.getReadName());
69+
r2Record = record;
70+
}
71+
boolean neg = r1Record.getReadNegativeStrandFlag();
72+
boolean mateneg = r2Record.getReadNegativeStrandFlag();
73+
74+
// skip the pair if it's not aligned correctly
75+
if (!r1Record.getReferenceName().equals(r2Record.getReferenceName())) {continue;}
76+
if (neg == mateneg) {continue;}
77+
78+
HitPair hp = new HitPair((neg ? r1Record.getAlignmentEnd() : r1Record.getAlignmentStart()),
79+
r2Record.getReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""),
80+
(mateneg ? r2Record.getAlignmentEnd() : r2Record.getAlignmentStart()),
81+
mateneg ? 1 : 0,
82+
1);
83+
addPair(r1Record.getReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""), neg ? '-':'+', hp);
84+
85+
} else {
86+
byRead.put(record.getReadName(), record);
87+
}
88+
// add the hit
89+
// ReadHit currHit = new ReadHit(
90+
// record.getReferenceName().replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", ""),
91+
// record.getAlignmentStart(), record.getAlignmentEnd(),
92+
// record.getReadNegativeStrandFlag() ? '-' : '+',
93+
// 1);
94+
// Read currRead = new Read();
95+
// currRead.addHit(currHit);
96+
// currRead.setNumHits(1);
97+
// addHits(currRead);
98+
}
99+
100+
iter.close();
101+
try {
102+
reader.close();
103+
} catch (IOException e) {
104+
e.printStackTrace();
105+
}
106+
}//end of sourceAllHits method
107+
108+
}

0 commit comments

Comments
 (0)