-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathWhatsHapPhase.snakefile
More file actions
113 lines (96 loc) · 2.91 KB
/
WhatsHapPhase.snakefile
File metadata and controls
113 lines (96 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import tempfile
#
# Locate the tempdir for grid processing
#
configfile: "phase.json"
if "TMPDIR" in os.environ:
TMPDIR = os.environ['TMPDIR']
elif "TMPDIR" in config:
TMPDIR = config['TMPDIR']
else:
TMPDIR = tempfile.gettempdir()
SD = os.path.dirname(workflow.snakefile)
faiFile = open(config["ref"] +".fai")
chroms = [l.split()[0] for l in faiFile ]
wd="/staging/mjc/mchaisso/phasing"
sample=config["sample"]
shell.prefix("set +eu ")
rule all:
input:
calledVCF=sample+".unphased.vcf",
indexedVcf=sample+".wh.vcf.gz"
rule CallVariantsByChrom:
input:
srBam=config["srbam"]
output:
calledChrom=sample+".unphased.{chrom}.vcf"
resources:
mem_gb=48,
threads=4
params:
grid_opts="sbatch -c 4 --mem=48G --time=24:00:00 -p cmb"
shell:"""
which java
/home/cmb-16/mjc/shared/software_packages/gatk-4.0.7.0/gatk HaplotypeCaller --input {input.srBam} --output {output.calledChrom} -L {wildcards.chrom} --reference /home/cmb-16/mjc/shared/references/hg38_noalts/hg38.no_alts.fasta
"""
rule CombineUnphasedByChrom:
input:
chroms=expand(sample+".unphased.{chrom}.vcf", chrom=chroms)
output:
phasedGenome=sample+".unphased.vcf"
params:
grid_opts="sbatch -c 1 --mem=2G --time=1:00:00 -p cmb"
resources:
mem_gb=4,
threads=1
shell:"""
grep "^#" {input.chroms[0]} | grep -v "CHROM" > {output.phasedGenome}
echo "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown" >> {output.phasedGenome}
cat {input.chroms} | grep -v "^#" >> {output.phasedGenome}
"""
rule PhaseByChromosome:
input:
lrBam=config["bam"],
srVCF=sample+".unphased.vcf"
output:
phasedChromosome=sample+".phased.{chrom}.vcf"
resources:
mem_gb=16,
threads=1
params:
ref=config["ref"],
grid_opts="sbatch -c 1 --mem=24G --time=12:00:00 -p cmb"
shell:"""
which whatshap
whatshap phase {input.srVCF} {input.lrBam} --ignore-read-groups --chromosome {wildcards.chrom} --reference {params.ref} -o /dev/stdout | egrep -w "^#|^{wildcards.chrom}" > {output.phasedChromosome}
"""
rule CombinePhasedChromosomes:
input:
chroms=expand(sample+".phased.{chrom}.vcf", chrom=chroms)
output:
phasedGenome=sample+".wh.vcf"
params:
grid_opts="sbatch -c 1 --mem=2G --time=1:00:00 -p cmb"
resources:
mem_gb=4,
threads=1
shell:"""
grep "^#" {input.chroms[0]} > {output.phasedGenome}
echo "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown" >> {output.phasedGenome}
cat {input.chroms} | grep -v "^#" >> {output.phasedGenome}
"""
rule IndexVCF:
input:
vcf="{input}.wh.vcf"
output:
gzvcf="{input}.wh.vcf.gz"
params:
grid_opts="sbatch -c 1 --mem=4G --time=1:00:00 -p cmb"
resources:
mem_gb=4,
threads=1
shell:"""
bgzip {input.vcf}
tabix {input.vcf}.gz
"""