-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGatherBamFiles.java
More file actions
116 lines (101 loc) · 4.87 KB
/
GatherBamFiles.java
File metadata and controls
116 lines (101 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package picard.sam;
import htsjdk.samtools.BamFileIoUtils;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
import java.io.File;
import java.util.List;
/**
* Program to perform a rapid "gather" operation on BAM files after a scatter operations where
* the same process has been performed on different regions of a BAM file creating many smaller
* BAM files that now need to be concatenated back together.
*
* @author Tim Fennell
*/
@CommandLineProgramProperties(
usage = GatherBamFiles.USAGE_SUMMARY + GatherBamFiles.USAGE_DETAILS,
usageShort = GatherBamFiles.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class GatherBamFiles extends CommandLineProgram {
static final String USAGE_SUMMARY = "Concatenate one or more BAM files as efficiently as possible";
static final String USAGE_DETAILS = "This tool performs a rapid \"gather\" operation on BAM files after scatter" +
" operations where the same process has been performed on different regions of a BAM file creating many " +
"smaller BAM files that now need to be concatenated (reassembled) back together." +
"<br /><br />" +
"Assumes that the list of BAM files provided as INPUT are in the order that they should be concatenated and" +
" simply concatenates the bodies of the BAM files while retaining the header from the first file. " +
"Operates via copying of the gzip blocks directly for speed but also supports generation of an MD5 on the" +
" output and indexing of the output BAM file. Only supports BAM files, does not support SAM files." +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar GatherBamFiles \\<br /> " +
" I=input1.bam \\ <br /> " +
" I=input2.bam \\ <br /> " +
" O=gathered_files.bam" +
"</pre> " +
"<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
doc = "Two or more BAM files or text files containing lists of BAM files (one per line).")
public List<File> INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output BAM file to write.")
public File OUTPUT;
private static final Log log = Log.getInstance(GatherBamFiles.class);
// Stock main method.
public static void main(final String[] args) {
final GatherBamFiles gatherer = new GatherBamFiles();
gatherer.CREATE_INDEX = true;
gatherer.instanceMainWithExit(args);
}
@Override
protected int doWork() {
final List<File> inputs = IOUtil.unrollFiles(INPUT, BamFileIoUtils.BAM_FILE_EXTENSION, ".sam");
for (final File f : inputs) IOUtil.assertFileIsReadable(f);
IOUtil.assertFileIsWritable(OUTPUT);
if (determineBlockCopyingStatus(inputs)) {
BamFileIoUtils.gatherWithBlockCopying(inputs, OUTPUT, CREATE_INDEX, CREATE_MD5_FILE);
} else {
gatherNormally(inputs, OUTPUT, CREATE_INDEX, CREATE_MD5_FILE, REFERENCE_SEQUENCE);
}
return 0;
}
private boolean determineBlockCopyingStatus(final List<File> inputs) {
boolean useBlockCopying = true;
for (final File f : inputs) {
if (!BamFileIoUtils.isBamFile(f)) {
useBlockCopying = false;
}
}
return useBlockCopying;
}
/**
* Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
* multiple BAM files.
*/
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
final File referenceFasta) {
final SAMFileHeader header;
{
header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));
}
final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);
for (final File f : inputs) {
log.info("Gathering " + f.getAbsolutePath());
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
for (final SAMRecord rec : in) out.addAlignment(rec);
CloserUtil.close(in);
}
out.close();
}
}