Skip to content

Commit 683f928

Browse files
committed
STARE v0.1
1 parent 7b40f3f commit 683f928

91 files changed

Lines changed: 178455 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Code/JASPAR_to_PSCM.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import sys
2+
import argparse
3+
import os
4+
5+
#This script converts PWMS in JASPAR format to PWMs in TRANSFAC format, such that they can be converted to the TRAP energy format
6+
#arg1 File containing the pwms in Jaspar format
7+
8+
def createHeader(outfile,name,database):
9+
outfile.write("//\n")
10+
outfile.write("XX\n")
11+
outfile.write("ID "+database+"\t"+name+"\n")
12+
outfile.write("XX\n")
13+
outfile.write("P0\tA\tC\tG\tT\n")
14+
15+
def storePreviousFactor(outfile,scores):
16+
motivelength=len(scores)/4
17+
for i in range(0,motivelength):
18+
temp=str(i+1)
19+
for j in range(0,4):
20+
temp+="\t"+scores[motivelength*j+i]
21+
outfile.write(temp+"\n")
22+
outfile.write("XX\n")
23+
24+
def main():
25+
parser=argparse.ArgumentParser(prog="ConvertTrainingDataToMaxAffinityFormat.py")
26+
parser.add_argument("jaspar",nargs=1,help="File containing the pwms in JASPAR format")
27+
args=parser.parse_args()
28+
jasparFormat=open(args.jaspar[0],"r")
29+
PSCMFormat=open(args.jaspar[0]+".PSCM","w")
30+
scores=[]
31+
for l in jasparFormat:
32+
if (">" in l):
33+
if (scores!=[]):
34+
storePreviousFactor(PSCMFormat,scores)
35+
database=l.split()[0].replace(">","")
36+
name=l.split()[1]
37+
createHeader(PSCMFormat,name,database)
38+
scores=[]
39+
else:
40+
scores+=l.split()
41+
storePreviousFactor(PSCMFormat,scores)
42+
PSCMFormat.close()
43+
jasparFormat.close()
44+
45+
46+
main()

Code/Juicebox_KR_normalization.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash -e
2+
3+
# Part of STARE: https://github.com/SchulzLab/STARE
4+
# Small bash script to ease the normalization of .hic-files with Juicebox (https://github.com/aidenlab/juicer/wiki/Data-Extraction)
5+
6+
help="\n
7+
Usage: ./Juicebox_KR_normalization.sh [-h hic-file to normalize]\n[-j path to the jar-file]\n
8+
[-d folder to write the normalized files to]\n
9+
optional:\n
10+
[-c the chromosomes to normalize, e.g. 1-22+XY, default is 1-22, also possible to query individual chromosomes or multiple ones comma separated (1,5,7)]\n
11+
[-b bin size, meaning resolution of the matrix (default 5000)]\n"
12+
13+
# ------------------------------------------------------------------------------------------------------
14+
# FETCHING INPUT
15+
# ------------------------------------------------------------------------------------------------------
16+
hic_file=""
17+
out_folder=""
18+
jar_file=""
19+
chromosomes="1-22"
20+
bin_size=5000
21+
22+
# Parsing command line.
23+
while getopts "h:d:j:c:" o;
24+
do
25+
case $o in
26+
h) hic_file=$OPTARG;;
27+
d) out_folder=$OPTARG;;
28+
j) jar_file=$OPTARG;;
29+
c) chromosomes=$OPTARG;;
30+
b) bin_size=$OPTARG;;
31+
esac
32+
done
33+
34+
if [ -z "$hic_file" ] ;
35+
then
36+
echo Hi-C file must be specified with -h
37+
exit 1;
38+
fi
39+
40+
if [ -z "$out_folder" ] ;
41+
then
42+
echo An output folder must be specified with -d
43+
exit 1;
44+
fi
45+
46+
if [ -z "$jar_file" ] ;
47+
then
48+
echo The jar file is needed to call Juicebox
49+
exit 1;
50+
fi
51+
52+
# Create the out_dir if not existent and call Juicebox for each of the specified chromosomes.
53+
if [ ! -d "$out_folder" ]; then
54+
mkdir "$out_folder"
55+
fi
56+
57+
if [[ $chromosomes == *"-"* ]]; then
58+
chr_range=$(echo ${chromosomes} | tr -d 'XY')
59+
IFS='-' read -ra ADDR <<< "${chr_range}"
60+
for i in $(seq "${ADDR[0]}" "${ADDR[1]}"); do
61+
java -jar ${jar_file} dump observed KR "${hic_file}" chr${i} chr${i} BP ${bin_size} ${out_folder}/chr${i}_KR_Contacts.txt;
62+
gzip ${out_folder}/chr${i}_KR_Contacts.txt;
63+
done
64+
# Manually check if any gonosome was selected in addition to the range.
65+
if [[ $chromosomes == *"X"* ]]; then
66+
java -jar ${jar_file} dump observed KR ${hic_file} chrX chrX BP ${bin_size} ${out_folder}/chrX_KR_Contacts.txt;
67+
gzip ${out_folder}/chrX_KR_Contacts.txt;
68+
fi
69+
if [[ $chromosomes == *"Y"* ]]; then
70+
java -jar ${jar_file} dump observed KR ${hic_file} chrY chrY BP ${bin_size} ${out_folder}/chrY_KR_Contacts.txt;
71+
gzip ${out_folder}/chrY_KR_Contacts.txt;
72+
fi
73+
elif [[ $chromosomes == *","* ]]; then
74+
IFS=',' read -ra csv <<< "$chromosomes"
75+
for i in "${csv[@]}"; do
76+
java -jar ${jar_file} dump observed KR ${hic_file} chr${i} chr${i} BP ${bin_size} ${out_folder}/chr${i}_KR_Contacts.txt;
77+
gzip ${out_folder}/chr${i}_KR_Contacts.txt;
78+
done
79+
else # If only one individual chromosome was queried.
80+
java -jar ${jar_file} dump observed KR "${hic_file}" chr${chromosomes} chr${chromosomes} BP ${bin_size} ${out_folder}/chr${chromosomes}_KR_Contacts.txt;
81+
gzip ${out_folder}/chr${chromosomes}_KR_Contacts.txt;
82+
fi
83+

0 commit comments

Comments
 (0)