-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFilterSamReads.java
More file actions
269 lines (231 loc) · 11.1 KB
/
FilterSamReads.java
File metadata and controls
269 lines (231 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/
/**
* $Id$
*/
package picard.sam;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.AlignedFilter;
import htsjdk.samtools.filter.FilteringIterator;
import htsjdk.samtools.filter.JavascriptSamRecordFilter;
import htsjdk.samtools.filter.ReadNameFilter;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
/**
* From a SAM or BAM file, produce a new SAM or BAM by filtering aligned reads or a list of read
* names provided in a file (one readname per line)
* <p/>
* $Id$
*/
@CommandLineProgramProperties(
usage = FilterSamReads.USAGE_SUMMARY + FilterSamReads.USAGE_DETAILS,
usageShort = FilterSamReads.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class FilterSamReads extends CommandLineProgram {
static final String USAGE_SUMMARY = "Subset read data from a SAM or BAM file";
static final String USAGE_DETAILS = "This tool takes a SAM or BAM file and subsets it to a new file that either excludes or " +
"only includes either aligned or unaligned reads (set using FILTER), or specific reads based on a list of reads names " +
"supplied in the READ_LIST_FILE. " +
"" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar FilterSamReads \\<br /> " +
" I=input.bam \\ <br /> " +
" O=output.bam \\<br /> " +
" READ_LIST_FILE=read_names.txt" +
" FILTER=filter_value" +
"</pre> " +
"For information on the SAM format, please see: http://samtools.sourceforge.net" +
"<hr />";
private static final Log log = Log.getInstance(FilterSamReads.class);
protected /* <- used in test */ enum Filter {
includeAligned("OUTPUT SAM/BAM will contain aligned reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of paired reads must be aligned to be included in the OUTPUT SAM or BAM)"),
excludeAligned("OUTPUT SAM/BAM will contain un-mapped reads only. INPUT SAM/BAM must be in queryname SortOrder. (Note that *both* first and second of pair must be aligned to be excluded from the OUTPUT SAM or BAM)"),
includeReadList("OUTPUT SAM/BAM will contain reads that are supplied in the READ_LIST_FILE file"),
excludeReadList("OUTPUT bam will contain reads that are *not* supplied in the READ_LIST_FILE file"),
includeJavascript("OUTPUT bam will contain reads that hava been accepted by the JAVASCRIPT_FILE script.");
private final String description;
Filter(final String description) {
this.description = description;
}
@Override
public String toString() {
return this.name() + " [" + description + "]";
}
}
@Option(doc = "The SAM or BAM file that will be filtered.",
optional = false,
shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
@Option(doc = "Filter.", optional = false)
public Filter FILTER = null;
@Option(doc = "Read List File containing reads that will be included or excluded from the OUTPUT SAM or BAM file.",
optional = true,
shortName = "RLF")
public File READ_LIST_FILE;
@Option(
doc = "SortOrder of the OUTPUT SAM or BAM file, otherwise use the SortOrder of the INPUT file.",
optional = true, shortName = "SO")
public SAMFileHeader.SortOrder SORT_ORDER;
@Option(
doc = "Create .reads files (for debugging purposes)",
optional = true)
public boolean WRITE_READS_FILES = true;
@Option(doc = "SAM or BAM file to write read excluded results to",
optional = false, shortName = "O")
public File OUTPUT;
@Option(shortName = "JS",
doc = "Filters a SAM or BAM file with a javascript expression using the java javascript-engine. "
+ " The script puts the following variables in the script context: "
+ " 'record' a SamRecord ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/SAMRecord.html ) and "
+ " 'header' a SAMFileHeader ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/SAMFileHeader.html )."
+ " Last value of the script should be a boolean to tell wether we should accept or reject the record.",
optional = true)
public File JAVASCRIPT_FILE = null;
private void filterReads(final FilteringIterator filteringIterator) {
// get OUTPUT header from INPUT and overwrite it if necessary
final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder();
if (SORT_ORDER != null) {
fileHeader.setSortOrder(SORT_ORDER);
}
final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder());
log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" +
OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]");
// create OUTPUT file
final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, presorted, OUTPUT);
final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written");
while (filteringIterator.hasNext()) {
final SAMRecord rec = filteringIterator.next();
outputWriter.addAlignment(rec);
progress.record(rec);
}
filteringIterator.close();
outputWriter.close();
log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
}
/**
* Write out a file of read names for debugging purposes.
*
* @param samOrBamFile The SAM or BAM file for which we are going to write out a file of its
* containing read names
*/
private void writeReadsFile(final File samOrBamFile) throws IOException {
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(samOrBamFile);
final File readsFile =
new File(OUTPUT.getParentFile(), IOUtil.basename(samOrBamFile) + ".reads");
IOUtil.assertFileIsWritable(readsFile);
final BufferedWriter bw = IOUtil.openFileForBufferedWriting(readsFile, false);
for (final SAMRecord rec : reader) {
bw.write(rec.toString() + "\n");
}
bw.close();
reader.close();
IOUtil.assertFileIsReadable(readsFile);
}
@Override
protected int doWork() {
try {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (WRITE_READS_FILES) writeReadsFile(INPUT);
final SamReader samReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final FilteringIterator filteringIterator;
switch (FILTER) {
case includeAligned:
filteringIterator = new FilteringIterator(samReader.iterator(),
new AlignedFilter(true), true);
break;
case excludeAligned:
filteringIterator = new FilteringIterator(samReader.iterator(),
new AlignedFilter(false), true);
break;
case includeReadList:
filteringIterator = new FilteringIterator(samReader.iterator(),
new ReadNameFilter(READ_LIST_FILE, true));
break;
case excludeReadList:
filteringIterator = new FilteringIterator(samReader.iterator(),
new ReadNameFilter(READ_LIST_FILE, false));
break;
case includeJavascript:
filteringIterator = new FilteringIterator(samReader.iterator(),
new JavascriptSamRecordFilter(
JAVASCRIPT_FILE,
samReader.getFileHeader()
));
break;
default:
throw new UnsupportedOperationException(FILTER.name() + " has not been implemented!");
}
filterReads(filteringIterator);
IOUtil.assertFileIsReadable(OUTPUT);
if (WRITE_READS_FILES) writeReadsFile(OUTPUT);
return 0;
} catch (Exception e) {
if (OUTPUT.exists() && !OUTPUT.delete()) {
log.warn("Failed to delete " + OUTPUT.getAbsolutePath());
}
log.error(e, "Failed to filter " + INPUT.getName());
return 1;
}
}
@Override
protected String[] customCommandLineValidation() {
if (INPUT.equals(OUTPUT)) {
return new String[]{"INPUT file and OUTPUT file must differ!"};
}
if ((FILTER.equals(Filter.includeReadList) ||
FILTER.equals(Filter.excludeReadList)) &&
READ_LIST_FILE == null) {
return new String[]{"A READ_LIST_FILE must be specified when using the " + FILTER.name() + " option"};
}
return super.customCommandLineValidation();
}
/**
* Stock main method.
*
* @param args main arguments
*/
public static void main(final String[] args) {
System.exit(new FilterSamReads().instanceMain(args));
}
}