-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_pi_odin.sh
More file actions
executable file
·203 lines (142 loc) · 7.23 KB
/
run_pi_odin.sh
File metadata and controls
executable file
·203 lines (142 loc) · 7.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
NAME=$1
SET=$2
TOOL=$3
mkdir -p results/$TOOL results/$TOOL/$SET
mkdir -p log/$TOOL
files=$(ls results/$TOOL/$SET/*.bed 2> /dev/null | wc -l)
if [ "$files" = "0" ]; then
echo "running $TOOL with: $1 $2 $3" #$4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15}"
OUT_NAME=$(basename $NAME _sample1-rep1_mm)
LOG="../../../log/$TOOL/$SET.log"
ODIN_PATH="/proj/chipseq_norm_diffbind_062017/analysis/03_db_analysis/python-virtual-environments/reg-gen-ODIN-0.4.1-release/rgt/ODIN/ODIN.py"
cd results/$TOOL/$SET/
STARTTIME=`date +%s.%N`
samtools merge s1.bam $4 $5
samtools merge s2.bam $7 $8
samtools index s1.bam
samtools index s2.bam
#add dm_chr2L to files..
cat /ssd/references/THOR/mm10/chr19.fa > chr19_plus.fa
cat /proj/chipseq_norm_diffbind_062017/analysis/01_simulating/data/dm6_chr2L.fasta | sed "s/chr2L/dm_chr2L/" >> chr19_plus.fa
cat /ssd/references/THOR/mm10/chr19.chrom.size > chr19_plus.chrom.size
echo -e "dm_chr2L\t23513712" >> chr19_plus.chrom.size
#active virtual environment for ODIN
source /proj/chipseq_norm_diffbind_062017/analysis/03_db_analysis/python-virtual-environments/odin_env/bin/activate
PREPDONE=`date +%s.%N`
#run odin binomial
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME -f 0.7 -b 100 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFFPREP=`echo "$PREPDONE - $STARTTIME" | bc | awk -F"." '{print}'`
TIMEDIFF=`echo "$ENDTIME - $PREPDONE" | bc | awk -F"." '{print}'`
echo "prep $TIMEDIFFPREP" > time.txt
echo "1 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "1 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_1.bed"
else
#create empty file
touch $OUT_NAME"_1.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
STARTTIME=`date +%s.%N`
#run odin poisson
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME --dist poisson -f 0.7 -b 100 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFF=`echo "$ENDTIME - $STARTTIME" | bc | awk -F"." '{print}'`
echo "2 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "2 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_2.bed"
else
#create empty file
touch $OUT_NAME"_2.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
STARTTIME=`date +%s.%N`
#run odin poisson-c
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME --dist poisson-c -f 0.7 -b 100 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFF=`echo "$ENDTIME - $STARTTIME" | bc | awk -F"." '{print}'`
echo "3 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "3 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_3.bed"
else
#create empty file
touch $OUT_NAME"_3.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
STARTTIME=`date +%s.%N`
#run odin binomial 1000 bp bin size
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME -f 0.7 -b 1000 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFF=`echo "$ENDTIME - $STARTTIME" | bc | awk -F"." '{print}'`
echo "4 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "4 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_4.bed"
else
#create empty file
touch $OUT_NAME"_4.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
STARTTIME=`date +%s.%N`
#run odin constraint 1000 bp bin size
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME --dist poisson -f 0.7 -b 1000 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFF=`echo "$ENDTIME - $STARTTIME" | bc | awk -F"." '{print}'`
echo "5 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "5 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_5.bed"
else
#create empty file
touch $OUT_NAME"_5.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
STARTTIME=`date +%s.%N`
#run odin poisson-c 1000 bp bin size
/usr/bin/time -o mem.txt -f "%K %M" python $ODIN_PATH -n $OUT_NAME --dist poisson-c -f 0.7 -b 1000 -p 1.0 --input-1=$6 --input-2=$9 s1.bam s2.bam chr19_plus.fa chr19_plus.chrom.size >> $LOG 2>&1
ENDTIME=`date +%s.%N`
TIMEDIFF=`echo "$ENDTIME - $STARTTIME" | bc | awk -F"." '{print}'`
echo "6 $TIMEDIFF" >> time.txt
MEMUSAGE=$(sed '/non-zero status/d' mem.txt )
echo "6 $MEMUSAGE" >> memory.txt
if [ -e $OUT_NAME"-diffpeaks.bed" ]; then
#reformat for eval
cat $OUT_NAME"-diffpeaks.bed" | awk '{split($11,a,","); lfold=((a[2]==0) ? "inf" : log(a[1]/a[2])/log(2)); print $1"\t"$2"\t"$3"\t"a[3]"\t"lfold}' | sort -k1,1 -k2,2n > $OUT_NAME"_6.bed"
else
#create empty file
touch $OUT_NAME"_6.bed"
fi
#clean up
rm -f *.bw *.info mem.txt
rm -f $OUT_NAME"-diffpeaks.bed" $OUT_NAME"-diffpeaks.narrowPeak" $OUT_NAME"-uncor-diffpeaks.bed" $OUT_NAME"-uncor-diffpeaks.narrowPeak"
#leave virtual environment
deactivate
#clean up
rm chr19_plus.fa chr19_plus.fa.fai chr19_plus.chrom.size
rm -rf s1.bam s2.bam s1.bam.bai s2.bam.bai
else
echo "results/$TOOL/$SET/bed already exists exiting..."
fi