-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbinner
More file actions
executable file
·473 lines (432 loc) · 24.7 KB
/
binner
File metadata and controls
executable file
·473 lines (432 loc) · 24.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
#!/bin/bash
# written by Philipp Resl, Oct. 2019, github.com/reslp/binner
# binners to add:
# https://github.com/younglululu/COCACOLA
# https://github.com/edgraham/BinSanity
# https://sourceforge.net/projects/sb2nhri/files/MyCC/
# https://sourceforge.net/p/metawatt/wiki/Home/
# https://github.com/cmks/DAS_Tool
set -e
usage() {
echo "Welcome to binner. A script to quickly run metagenomic binning software using Docker."
echo
echo "Usage: $0 [-v] [-a <assembly_file>] [-f <read_file1>] [-r <read_file2>] [-m maxbin,metabat,blobtools,concoct] [-t nthreads] [[--diamonddb=/path/to/diamonddb --protid=/path/to/prot.accession2taxid]] [-q] [-b [--buscosets=set1,set2]]"
echo
echo "Options:"
echo " -a <assembly_file> Path to assembly file in FASTA format"
echo " -f <read_file1> Path to forward read file in FASTQ format (can be gzipped)"
echo " -r <read_file2> Path to reverse read file in FASTQ format (can be gzipped)"
echo " IMPORTANT: Currently the assembly and read files need to be in the same directory which has to the directory in which binner is run."
echo " -m <maxbin,metabat,blobtools,concoct> specify binning software to run."
echo " Seperate multiple options by a , (eg. -m maxbin,blobtools)."
echo " -o Output directory. Default: Current working directory"
echo " -t number of threads for multi-threaded parts"
echo " -q run QUAST on the binned sets of contigs"
echo " -b run BUSCO on the binned sets of contigs. See additional details below."
echo " --multiqc Run multiqc after all binning steps to create a summary report on all the bins. This should be used together with -q, -b or both."
echo " --singularity Use singularity instead of Docker to run containers."
echo " --force Override existing output"
echo
echo " -v Display program version and versions of used additional software."
echo
echo "Options specific to blobtools:"
echo " The blobtools container used here uses diamond instead of blast to increase speed."
echo " Options needed when blobtools should be run. The blobtools container used here uses diamond instead of blast to increase speed."
echo " --diamonddb= full (absolute) path to diamond database"
echo " --protid= full (absolute) path to prot.accession2taxid file provided by NCBI"
echo
echo "Options needed when BUSCO analysis of contigs should be performed:"
echo " The blobtools container used here uses diamond instead of blast to increase speed."
echo " Options needed when blobtools should be run. The blobtools container used here uses diamond instead of blast to increase speed."
echo " --buscosets= BUSCO sets which should be tested. This will be run for each set of contigs. Individual sets should be comma"
echo " separated. eg. --buscosets=fungi_odb9,bacteria_odb9,insects_odb9 ."
echo " Running this assumes that folders with the busco sets exist in the current working directory. They should have"
echo " the same name as passed to the --buscosets command. If they are not found binner will try to download them"
echo " from the BUSCO website."
1>&2; exit 1; }
# versions of used containers:
QUAST_VERSION="5.0.2"
BUSCO_VERSION="3.0.2"
MULTIQC_VERSION="1.6"
BOWTIE2_VERSION="2.3.5"
SAMTOOLS_VERSION="1.9"
DIAMOND_VERSION="2.0.7"
MAXBIN_VERSION="2.2.6"
METABAT_VERSION="2.13"
CONCOCT_VERSION="1.1"
BLOBTOOLS_VERSION="1.1.1"
SINGULARITY="no"
version() {
echo "binner v0.3 - https://github.com/reslp/binner"
echo
echo "Additional software used:"
echo " Quast: $QUAST_VERSION (http://quast.sourceforge.net/quast)"
echo " Busco: $BUSCO_VERSION (https://busco-archive.ezlab.org/v3/)"
echo " MultiQC: $MULTIQC_VERSION (https://multiqc.info/)"
echo " bowtie2: $BOWTIE2_VERSION (http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)"
echo " samtools: $SAMTOOLS_VERSION (http://www.htslib.org/)"
echo " diamond: $DIAMOND_VERSION (http://www.diamondsearch.org/index.php)"
echo " MaxBin: $MAXBIN_VERSION (https://sourceforge.net/projects/maxbin/)"
echo " MetaBAT: $METABAT_VERSION (https://bitbucket.org/berkeleylab/metabat/src/master/)"
echo " CONCOCT: $CONCOCT_VERSION (https://github.com/BinPro/CONCOCT)"
echo " BlobTools: $BLOBTOOLS_VERSION (https://blobtools.readme.io/docs)"
exit 0
}
QUAST="";
BUSCO="";
MULTIQC="";
THREADS=1;
FORCE="no";
while getopts ":t:m:a:o:f:r:vp:-:qb" option;
do
case "${option}"
in
a) ASSEMBLY=${OPTARG};;
f) R1=${OPTARG};;
r) R2=${OPTARG};;
o) OUTDIR=${OPTARG};;
v) version;;
m) OPTIONS=${OPTARG};;
t) THREADS=${OPTARG};;
p) PROTID=${OPTARG};;
q) QUAST="quast";;
b) BUSCO="busco";;
-) LONG_OPTARG="${OPTARG#*=}"
case $OPTARG in
diamonddb=?*) DIAMONDDB="$LONG_OPTARG" ;;
protid=?*) PROTID="$LONG_OPTARG";;
buscosets=?*) BUSCOS="$LONG_OPTARG";;
multiqc) MULTIQC="multiqc";;
singularity) SINGULARITY="yes";;
force) FORCE="yes";;
'' ) break ;;
*) echo "Illegal option --$OPTARG\n" >&2; usage; exit 2 ;;
esac ;;
*) echo "Illegal option --$OPTARG\n" >&2; usage;;
?) echo "Illegal option --$OPTARG\n" >&2 usage;;
esac
done
if [ $OPTIND -eq 1 ]; then usage; fi
#echo $OPTIONS
get_abs_filename() {
# this function was taken from here: https://stackoverflow.com/questions/3915040/bash-fish-command-to-print-absolute-path-to-a-file
# $1 : relative filename
filename=$1
parentdir=$(dirname "${filename}")
if [ -d "${filename}" ]; then
echo "$(cd "${filename}" && pwd)"
elif [ -d "${parentdir}" ]; then
echo "$(cd "${parentdir}" && pwd)/$(basename "${filename}")"
fi
}
# need get absolute filepaths for the read and assembly files
RF1PATH=$(dirname $(get_abs_filename $R1))
RF2PATH=$(dirname $(get_abs_filename $R2))
RF1=$(basename $R1)
RF2=$(basename $R2)
ASSEMBLYPATH=$(dirname $(get_abs_filename $ASSEMBLY))
ASSEMBLY=$(basename $ASSEMBLY)
echo $ASSEMBLYPATH
echo $ASSEMBLY
SILENT="-s"
if [ -z ${OUTDIR} ]; then
OUTDIR=$(pwd)
else
OUTDIR=$(get_abs_filename $OUTDIR)
fi
echo "(binner) Output will be placed in: $OUTDIR"
mkdir -p $OUTDIR
cd $OUTDIR
if [[ "$SINGULARITY" == "yes" ]]; then
echo "Will be using Singularity instead of Docker. This is still experimental."
fi
if [ "$BUSCO" == "busco" ]; then
echo "(binner) Checking if specified BUSCO sets are available: " ${BUSCOS//,/ }
for BUSC in ${BUSCOS//,/ }; do
if [[ -d $BUSC ]]; then
echo "(binner) BUSCO set $BUSC is present."
else
echo "(binner) $BUSCO not found. Will try to download from http://busco.ezlab.org/v2/datasets/$BUSC.tar.gz"
wget http://busco.ezlab.org/v2/datasets/"$BUSC".tar.gz
tar xfz "$BUSC".tar.gz
rm "$BUSC".tar.gz
fi
done;
fi
# this needs to be set because on Linux docker created files will be owned by root by default.
unset DOCKER_USER
if [[ "$OSTYPE" == "linux-gnu" ]]; then
DOCKER_USER="--user $(id -u):$(id -g)"
elif [[ "$OSTYPE" == "darwin"* ]]; then #nothing to be done on MacOS
DOCKER_USER=""
fi
if [[ ! -f "$ASSEMBLY".bam ]]; then
echo "(binner) No BAM file found. Will perform read mapping with bowtie2 ..."
if [[ ! -f "$ASSEMBLY".index.1.bt2 ]]; then
echo "(binner) No Bowtie2 index file found. Creating Bowtie2 index..."
if [[ "$SINGULARITY" == "yes" ]]; then
singularity exec $SILENT -B $(pwd):/data -B $ASSEMBLYPATH:/assembly/ docker://reslp/bowtie2:$BOWTIE2_VERSION bowtie2-build /assembly/$ASSEMBLY /data/$ASSEMBLY.index -q
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ reslp/bowtie2:$BOWTIE2_VERSION bowtie2-build /assembly/$ASSEMBLY /data/$ASSEMBLY.index -q
fi
fi
echo "(binner) Mapping reads ..."
if [[ "$SINGULARITY" == "yes" ]]; then
echo $RF1PATH
singularity $SILENT exec -B $(pwd):/data -B $RF1PATH -B $RF2PATH docker://reslp/bowtie2:$BOWTIE2_VERSION bowtie2 -p $THREADS -q --phred33 --fr -x /data/$ASSEMBLY.index -1 $RF1PATH/$RF1 -2 $RF2PATH/$RF2 -S /data/$ASSEMBLY.sam --quiet
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -v $RF1PATH:/RF1/ -v $RF2PATH:/RF2/ reslp/bowtie2:$BOWTIE2_VERSION bowtie2 -p $THREADS -q --phred33 --fr -x /data/$ASSEMBLY.index -1 /RF1/$RF1 -2 /RF2/$RF2 -S /data/$ASSEMBLY.sam --quiet
fi
echo "(binner) Converting SAM to BAM ..."
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data docker://reslp/samtools:$SAMTOOLS_VERSION samtools view -bS /data/$ASSEMBLY.sam -o /data/$ASSEMBLY.bam
singularity $SILENT exec -B $(pwd):/data docker://reslp/samtools:$SAMTOOLS_VERSION samtools sort -o /data/$ASSEMBLY.bam /data/$ASSEMBLY.bam
echo "(binner) Will index BAM file ..."
singularity $SILENT exec -B $(pwd):/data docker://reslp/samtools:$SAMTOOLS_VERSION samtools index /data/$ASSEMBLY.bam
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ reslp/samtools:$SAMTOOLS_VERSION samtools view -bS /data/$ASSEMBLY.sam -o /data/$ASSEMBLY.bam
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ reslp/samtools:$SAMTOOLS_VERSION samtools sort -o /data/$ASSEMBLY.bam /data/$ASSEMBLY.bam
echo "(binner) Will index BAM file ..."
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ reslp/samtools:$SAMTOOLS_VERSION samtools index /data/$ASSEMBLY.bam
fi
fi
if [[ $OPTIONS == *"maxbin"* ]]; then
echo "(binner) Will run MaxBin"
if [[ -d maxbin && $FORCE != "yes" ]]; then
echo "(binner) Maxbin output directory already exists. Will skip."
else
mkdir -p maxbin
# these docker commands are not optimal because the create files as the root user.
# passing UID and GID doesn't work in this case because of the way maxbin is set up.
# I have not yet found a way around this.
echo "(binner) Extracting coverage information from BAM file"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data docker://reslp/samtools:$SAMTOOLS_VERSION samtools idxstats /data/$ASSEMBLY.bam > maxbin/$ASSEMBLY.idxstats
cut -f1,3 maxbin/$ASSEMBLY.idxstats > maxbin/$ASSEMBLY.counts
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH docker://reslp/maxbin:$MAXBIN_VERSION run_MaxBin.pl -contig $ASSEMBLYPATH/$ASSEMBLY -abund /data/maxbin/$ASSEMBLY.counts -thread $THREADS -out /data/maxbin/maxbin_bin_out
else
docker run --rm -t -v $(pwd):/data/ reslp/samtools:$SAMTOOLS_VERSION samtools idxstats /data/$ASSEMBLY.bam > maxbin/$ASSEMBLY.idxstats
cut -f1,3 maxbin/$ASSEMBLY.idxstats > maxbin/$ASSEMBLY.counts
docker run --rm -t -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ reslp/maxbin:$MAXBIN_VERSION run_MaxBin.pl -contig /data/$ASSEMBLY -abund /data/maxbin/$ASSEMBLY.counts -thread $THREADS -out /data/maxbin/maxbin_bin_out
fi
if [ "$BUSCO" == "busco" ]; then
mkdir -p maxbin/tmp
for f in $(ls maxbin/*.fasta); do
for BUSC in ${BUSCOS//,/ }; do
echo "(binner) Run BUSCO $BUSC for $f"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data docker://reslp/busco:$BUSCO_VERSION export AUGUSTUS_CONFIG_PATH="/opt/conda/config" && cd maxbin && run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -e AUGUSTUS_CONFIG_PATH="/opt/conda/config" -w /data/maxbin reslp/busco:$BUSCO_VERSION run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
fi
done
done
fi
fi # check outdir
fi
if [[ $OPTIONS == *"metabat"* ]]; then
echo "(binner) Will run MetaBat"
if [[ -d metabat && $FORCE != "yes" ]]; then
echo "(binner) MetaBat output directory already exists. Will skip."
else
mkdir -p metabat
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data docker://reslp/metabat:$METABAT_VERSION jgi_summarize_bam_contig_depths --outputDepth /data/metabat/metabat_depth.txt --pairedContigs /data/metabat/metabat_paired.txt /data/$ASSEMBLY.bam
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/metabat:$METABAT_VERSION metabat2 -i /assembly/$ASSEMBLY -a /data/metabat/metabat_depth.txt -t $THREADS -o /data/metabat/metabat --sensitive
else
docker run -t $DOCKER_USER -v $(pwd):/data/ --rm reslp/metabat:$METABAT_VERSION jgi_summarize_bam_contig_depths --outputDepth /data/metabat/metabat_depth.txt --pairedContigs /data/metabat/metabat_paired.txt /data/$ASSEMBLY.bam
docker run -t $DOCKER_USER -w /data/metabat/ -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/metabat:$METABAT_VERSION metabat2 -i /assembly/$ASSEMBLY -a /data/metabat/metabat_depth.txt -o metabat --sensitive
fi
# if [ "$QUAST" == "quast" ]; then
# echo "(binner) Will run QUAST for MetaBat bins"
# if [[ "$SINGULARITY" == "yes" ]]; then
# singularity exec -B $(pwd):/data docker://reslp/quast:$QUAST_VERSION quast.py -o /data/metabat/quast_metabat --silent
# else
# docker run --rm -t -v $(pwd):/data/ $DOCKER_USER reslp/quast:QUAST_VERSION quast.py $(ls -d -1 "metabat/"*.fa | sed 's/^/\/data\//') -o /data/metabat/quast_metabat --silent
# fi
# fi
if [ "$BUSCO" == "busco" ]; then
mkdir -p metabat/tmp
for f in $(ls metabat/*.fa); do
for BUSC in ${BUSCOS//,/ }; do
echo "(binner) Run BUSCO $BUSC for $f"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity exec -B $(pwd):/data docker://reslp/busco:$BUSCO_VERSION export AUGUSTUS_CONFIG_PATH="/opt/conda/config" && cd metabat && run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -e AUGUSTUS_CONFIG_PATH="/opt/conda/config" -w /data/metabat:$METABAT_VERSION reslp/busco run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
fi
done
done
fi
fi
fi
if [[ $OPTIONS == *"concoct"* ]]; then
echo "(binner) Will run concoct"
if [[ -d concoct && $FORCE != "yes" ]]; then
echo "(binner) CONCOCT output directory already exists. Will skip."
else
mkdir -p concoct
if [[ "$SINGULARITY" == "yes" ]]; then
echo "(binner) Digesting FASTA file ..."
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/concoct:$CONCOCT_VERSION cut_up_fasta.py /assembly/"$ASSEMBLY" -c 10000 -o 0 --merge_last -b /data/"$ASSEMBLY"_contigs_10K.bed > "$ASSEMBLY"_contigs_10K.fa
echo "(binner) Creating coverage table ..."
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/concoct:$CONCOCT_VERSION concoct_coverage_table.py /data/"$ASSEMBLY"_contigs_10K.bed /data/"$ASSEMBLY".bam > concoct_coverage_table.tsv
echo "(binner) running concoct ..."
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/concoct:$CONCOCT_VERSION concoct --composition_file /data/"$ASSEMBLY"_contigs_10K.fa --coverage_file /data/concoct_coverage_table.tsv -b /data/concoct/"$ASSEMBLY"_concoct --threads $THREADS
echo "(binner) Merging results ..."
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/concoct:$CONCOCT_VERSION merge_cutup_clustering.py /data/concoct/"$ASSEMBLY"_concoct_clustering_gt1000.csv > concoct/"$ASSEMBLY"_concoct_clustering_merged.csv
echo "(binner) Exract FASTA chunks ..."
mkdir -p concoct/bins
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/concoct:$CONCOCT_VERSION extract_fasta_bins.py /assembly/"$ASSEMBLY" /data/concoct/"$ASSEMBLY"_concoct_clustering_merged.csv --output_path /data/concoct/bins
cd concoct/bins
singularity $SILENT exec docker://reslp/rename:1.10-1 rename "s/^/"concoct_"/" *.fa
cd ../..
else
echo "(binner) Digesting FASTA file ..."
docker run -t $DOCKER_USER -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/concoct:$CONCOCT_VERSION cut_up_fasta.py /assembly/"$ASSEMBLY" -c 10000 -o 0 --merge_last -b /data/"$ASSEMBLY"_contigs_10K.bed > "$ASSEMBLY"_contigs_10K.fa
echo "(binner) Creating coverage table ..."
docker run -t $DOCKER_USER -v $(pwd):/data/ --rm reslp/concoct:$CONCOCT_VERSION concoct_coverage_table.py /data/"$ASSEMBLY"_contigs_10K.bed /data/"$ASSEMBLY".bam > concoct_coverage_table.tsv
echo "(binner) running concoct ..."
docker run -t $DOCKER_USER -v $(pwd):/data/ --rm reslp/concoct:$CONCOCT_VERSION concoct --composition_file /data/"$ASSEMBLY"_contigs_10K.fa --coverage_file /data/concoct_coverage_table.tsv -b /data/concoct/"$ASSEMBLY"_concoct --threads $THREADS
echo "(binner) Merging results ..."
docker run $DOCKER_USER -v $(pwd):/data/ --rm reslp/concoct:$CONCOCT_VERSION merge_cutup_clustering.py /data/concoct/"$ASSEMBLY"_concoct_clustering_gt1000.csv > concoct/"$ASSEMBLY"_concoct_clustering_merged.csv
echo "(binner) Exract FASTA chunks ..."
mkdir -p concoct/bins
docker run -t $DOCKER_USER -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/concoct:$CONCOCT_VERSION extract_fasta_bins.py /assembly/"$ASSEMBLY" /data/concoct/"$ASSEMBLY"_concoct_clustering_merged.csv --output_path /data/concoct/bins
cd concoct/bins
docker run --rm -v $(pwd):/data reslp/rename:1.10-1 rename "s/^/"concoct_"/" *.fa
cd ../..
fi
# if [ "$QUAST" == "quast" ]; then
# echo "(binner) Will run QUAST for CONCOT bins"
# if [[ "$SINGULARITY" == "yes" ]]; then
# singularity exec -B $(pwd):/data docker://reslp/quast:$QUAST_VERSION quast.py $(ls -d -1 "concoct/bins/"*.fa | sed 's/^/\/data\//') -o /data/concoct/quast_concoct --silent
# else
#
# docker run --rm -t -v $(pwd):/data/ $DOCKER_USER reslp/quast:$QUAST_VERSION quast.py $(ls -d -1 "concoct/bins/"*.fa | sed 's/^/\/data\//') -o /data/concoct/quast_concoct --silent
# fi
# fi
if [ "$BUSCO" == "busco" ]; then
mkdir -p concoct/bins/tmp
for f in $(ls concoct/bins/*.fa); do
for BUSC in ${BUSCOS//,/ }; do
echo "(binner) Run BUSCO $BUSC for $f"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity exec -B $(pwd):/data docker://reslp/busco:$BUSCO_VERSION export AUGUSTUS_CONFIG_PATH="/opt/conda/config" && cd concoct/bins && run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -e AUGUSTUS_CONFIG_PATH="/opt/conda/config" -w /data/concoct/bins reslp/busco:$BUSCO_VERSION run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../../"$BUSC" -m genome -q
fi
done
done
fi
fi
fi
if [[ $OPTIONS == *"blobtools"* ]]; then
echo "(binner) Will prepare for blobtools"
if [ -z $DIAMONDDB ]; then
echo "(binner) Error: Path to diamond db not set."
exit 1
fi
if [ -z $PROTID ]; then
echo "(binner) Error: Path to prot.accession2taxid not set."
exit 1
fi
if [ ! -f $PROTID ]; then
echo "(binner) Error: $PROTID does not exist. Is the path correct?"
exit 1
fi
if [ ! -f $DIAMONDDB".dmnd" ]; then
echo "(binner) Error: $DIAMONDDB.dmnd does not exist. Is the path correct?"
exit 1
fi
echo "(binner) location of diamond db: "$DIAMONDDB
echo "(binner) location of prot.accession2taxid: "$PROTID
mkdir -p blobtools
if [ ! -f blobtools/"$ASSEMBLY"_diamond_matches ]; then
echo "(binner) No diamond results found. Will therefore run diamond"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data -B $(dirname $DIAMONDDB):/opt/database/ -B $ASSEMBLYPATH:/assembly/ docker://reslp/diamond:$DIAMOND_VERSION diamond blastx -d /opt/database/$(basename $DIAMONDDB) -q /assembly/$ASSEMBLY -o /data/blobtools/"$ASSEMBLY"_diamond_matches -p $THREADS
else
docker run -t $DOCKER_USER -v $(pwd):/data/ -v $(dirname $DIAMONDDB):/opt/database/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/diamond:$DIAMOND_VERSION diamond blastx -d /opt/database/$(basename $DIAMONDDB) -q /assembly/$ASSEMBLY -o /data/blobtools/"$ASSEMBLY"_diamond_matches -p $THREADS
fi
fi
echo "(binner) reformatting diamond results for use with blobtools..."
if [[ -f blobtools/"$ASSEMBLY"_diamond_matches_formatted ]]; then
echo "(binner) File with reformatted diamond results already present, will skip."
else
if [[ "$SINGULARITY" == "yes" ]]; then
echo $PROTID
singularity $SILENT exec -B $(pwd):/data -B $(dirname $PROTID):/opt/mapping docker://reslp/get_taxids get_taxids.py -p /opt/mapping/prot.accession2taxid /data/blobtools/"$ASSEMBLY"_diamond_matches > blobtools/"$ASSEMBLY"_diamond_matches_formatted
echo
else
docker run $DOCKER_USER -v $(pwd):/data/ -v $(dirname $PROTID):/opt/mapping --rm reslp/get_taxids -p /opt/mapping/prot.accession2taxid /data/blobtools/"$ASSEMBLY"_diamond_matches > blobtools/"$ASSEMBLY"_diamond_matches_formatted
echo
fi
fi
if [[ -f blobtools/"$ASSEMBLY"_blobtools.blobDB.json ]]; then
echo "(binner) blobtools results seem to be present. Will skip"
else
echo "(binner) Running blobtools"
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/blobtools:$BLOBTOOLS_VERSION blobtools create -i /assembly/$ASSEMBLY -b /data/$ASSEMBLY.bam -t /data/blobtools/"$ASSEMBLY"_diamond_matches_formatted -o /data/blobtools/"$ASSEMBLY"_blobtools
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/blobtools:$BLOBTOOLS_VERSION blobtools view -i /data/blobtools/"$ASSEMBLY"_blobtools.blobDB.json -o /data/blobtools/
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/blobtools:$BLOBTOOLS_VERSION blobtools plot -i /data/blobtools/"$ASSEMBLY"_blobtools.blobDB.json -o /data/blobtools/
else
docker run -t $DOCKER_USER -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/blobtools:$BLOBTOOLS_VERSION create -i /assembly/$ASSEMBLY -b /data/$ASSEMBLY.bam -t /data/blobtools/"$ASSEMBLY"_diamond_matches_formatted -o /data/blobtools/"$ASSEMBLY"_blobtools
docker run -t $DOCKER_USER -v $(pwd):/data/ --rm reslp/blobtools:$BLOBTOOLS_VERSION view -i /data/blobtools/"$ASSEMBLY"_blobtools.blobDB.json -o /data/blobtools/
docker run -t $DOCKER_USER -v $(pwd):/data/ --rm reslp/blobtools:$BLOBTOOLS_VERSION plot -i /data/blobtools/"$ASSEMBLY"_blobtools.blobDB.json -o /data/blobtools/
fi
echo "(binner) extracting contigs from blobtools"
cp "$ASSEMBLYPATH"/"$ASSEMBLY" blobtools/
cd blobtools
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data -B $ASSEMBLYPATH:/assembly docker://reslp/extract_contigs /usr/bin/extract_contigs_from_blobtools.py /assembly/"$ASSEMBLY" /data/"$ASSEMBLY"_blobtools.blobDB.table.txt
else
docker run -t $DOCKER_USER -v $(pwd):/data/ -v $ASSEMBLYPATH:/assembly/ --rm reslp/extract_contigs /assembly/"$ASSEMBLY" /data/"$ASSEMBLY"_blobtools.blobDB.table.txt
fi
rm $ASSEMBLY
cd ..
fi
# if [ "$QUAST" == "quast" ]; then
# echo "(binner) Will run QUAST for blobtools bins"
# if [[ "$SINGULARITY" == "yes" ]]; then
# singularity exec -B $(pwd):/data docker://reslp/quast:$QUAST_VERSION quast.py $(ls -d -1 "blobtools/"*.fa | sed 's/^/\/data\//') -o /data/blobtools/quast_blobtools --silent
# else
# docker run --rm -t -v $(pwd):/data/ $DOCKER_USER reslp/quast:$QUAST_VERSION quast.py $(ls -d -1 "blobtools/"*.fa | sed 's/^/\/data\//') -o /data/blobtools/quast_blobtools --silent
# fi
# fi
if [ "$BUSCO" == "busco" ]; then
mkdir -p blobtools/tmp
for f in $(ls blobtools/*.fa); do
for BUSC in ${BUSCOS//,/ }; do
echo "(binner) Run BUSCO $BUSC for $f"
if [[ "$SINGULARITY" == "yes" ]]; then
if [ ! -d "$(pwd)/config" ]; then
echo "(binner) Singularity specific Will copy augustus config path folder to writable location"
singularity exec -B $(pwd):/data --env AUGUSTUS_CONFIG_PATH="/opt/conda/config" docker://reslp/busco:$BUSCO_VERSION cp -r /opt/conda/config /data/config
fi
singularity exec -B $(pwd):/data --env AUGUSTUS_CONFIG_PATH="/data/config" docker://reslp/busco:$BUSCO_VERSION run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path "/data/$BUSC" -m genome -q
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -e AUGUSTUS_CONFIG_PATH="/opt/conda/config" -w /data/blobtools:$BLOBTOOLS_VERSION reslp/busco run_busco -i $(basename "$f") --out "$BUSC"_"$(basename "$f")" -c $THREADS --lineage_path ../"$BUSC" -m genome -q
fi
done
done
fi
fi
if [ "$QUAST" == "quast" ]; then
echo "(binner) Will run QUAST on all binning results"
FILES=$(ls -d -1 "maxbin/"*.fasta | sed 's/^/\/data\//')" "$(ls -d -1 "metabat/"*.fa | sed 's/^/\/data\//')" "$(ls -d -1 "concoct/bins/"*.fa | sed 's/^/\/data\//')" "$(ls -d -1 "blobtools/"*.fa | sed 's/^/\/data\//')
if [[ "$SINGULARITY" == "yes" ]]; then
singularity $SILENT exec -B $(pwd):/data docker://reslp/quast:$QUAST_VERSION quast.py $FILES -o /data/quast --silent
else
docker run --rm -t -v $(pwd):/data/ reslp/quast:$QUAST_VERSION quast.py $FILES -o /data/maxbin/quast_maxbin --silent
fi
fi
if [ "$MULTIQC" == "multiqc" ]; then
echo "(binner) --multiqc specified. Will run multiQC now. Will only report something when quast and busco have been run."
if [[ "$SINGULARITY" == "yes" ]]; then
singularity exec docker://reslp/multiqc:$MULTIQC_VERSION multiqc .
else
docker run -t --rm $DOCKER_USER -v $(pwd):/data/ -w /data/ reslp/multiqc:$MULTIQC_VERSION
fi
fi