|
| 1 | +#!/bin/bash -e |
| 2 | + |
| 3 | +# Part of STARE: https://github.com/SchulzLab/STARE |
| 4 | +# Small bash script to ease the normalization of .hic-files with Juicebox (https://github.com/aidenlab/juicer/wiki/Data-Extraction) |
| 5 | + |
| 6 | +help="\n |
| 7 | +Usage: ./Juicebox_KR_normalization.sh [-h hic-file to normalize]\n[-j path to the jar-file]\n |
| 8 | +[-d folder to write the normalized files to]\n |
| 9 | +optional:\n |
| 10 | +[-c the chromosomes to normalize, e.g. 1-22+XY, default is 1-22, also possible to query individual chromosomes or multiple ones comma separated (1,5,7)]\n |
| 11 | +[-b bin size, meaning resolution of the matrix (default 5000)]\n" |
| 12 | + |
| 13 | +# ------------------------------------------------------------------------------------------------------ |
| 14 | +# FETCHING INPUT |
| 15 | +# ------------------------------------------------------------------------------------------------------ |
| 16 | +hic_file="" |
| 17 | +out_folder="" |
| 18 | +jar_file="" |
| 19 | +chromosomes="1-22" |
| 20 | +bin_size=5000 |
| 21 | + |
| 22 | +# Parsing command line. |
| 23 | +while getopts "h:d:j:c:" o; |
| 24 | +do |
| 25 | +case $o in |
| 26 | + h) hic_file=$OPTARG;; |
| 27 | + d) out_folder=$OPTARG;; |
| 28 | + j) jar_file=$OPTARG;; |
| 29 | + c) chromosomes=$OPTARG;; |
| 30 | + b) bin_size=$OPTARG;; |
| 31 | +esac |
| 32 | +done |
| 33 | + |
| 34 | +if [ -z "$hic_file" ] ; |
| 35 | +then |
| 36 | + echo Hi-C file must be specified with -h |
| 37 | + exit 1; |
| 38 | +fi |
| 39 | + |
| 40 | +if [ -z "$out_folder" ] ; |
| 41 | +then |
| 42 | + echo An output folder must be specified with -d |
| 43 | + exit 1; |
| 44 | +fi |
| 45 | + |
| 46 | +if [ -z "$jar_file" ] ; |
| 47 | +then |
| 48 | + echo The jar file is needed to call Juicebox |
| 49 | + exit 1; |
| 50 | +fi |
| 51 | + |
| 52 | +# Create the out_dir if not existent and call Juicebox for each of the specified chromosomes. |
| 53 | +if [ ! -d "$out_folder" ]; then |
| 54 | + mkdir "$out_folder" |
| 55 | +fi |
| 56 | + |
| 57 | +if [[ $chromosomes == *"-"* ]]; then |
| 58 | + chr_range=$(echo ${chromosomes} | tr -d 'XY') |
| 59 | + IFS='-' read -ra ADDR <<< "${chr_range}" |
| 60 | + for i in $(seq "${ADDR[0]}" "${ADDR[1]}"); do |
| 61 | + java -jar ${jar_file} dump observed KR "${hic_file}" chr${i} chr${i} BP ${bin_size} ${out_folder}/chr${i}_KR_Contacts.txt; |
| 62 | + gzip ${out_folder}/chr${i}_KR_Contacts.txt; |
| 63 | + done |
| 64 | + # Manually check if any gonosome was selected in addition to the range. |
| 65 | + if [[ $chromosomes == *"X"* ]]; then |
| 66 | + java -jar ${jar_file} dump observed KR ${hic_file} chrX chrX BP ${bin_size} ${out_folder}/chrX_KR_Contacts.txt; |
| 67 | + gzip ${out_folder}/chrX_KR_Contacts.txt; |
| 68 | + fi |
| 69 | + if [[ $chromosomes == *"Y"* ]]; then |
| 70 | + java -jar ${jar_file} dump observed KR ${hic_file} chrY chrY BP ${bin_size} ${out_folder}/chrY_KR_Contacts.txt; |
| 71 | + gzip ${out_folder}/chrY_KR_Contacts.txt; |
| 72 | + fi |
| 73 | +elif [[ $chromosomes == *","* ]]; then |
| 74 | + IFS=',' read -ra csv <<< "$chromosomes" |
| 75 | + for i in "${csv[@]}"; do |
| 76 | + java -jar ${jar_file} dump observed KR ${hic_file} chr${i} chr${i} BP ${bin_size} ${out_folder}/chr${i}_KR_Contacts.txt; |
| 77 | + gzip ${out_folder}/chr${i}_KR_Contacts.txt; |
| 78 | + done |
| 79 | +else # If only one individual chromosome was queried. |
| 80 | + java -jar ${jar_file} dump observed KR "${hic_file}" chr${chromosomes} chr${chromosomes} BP ${bin_size} ${out_folder}/chr${chromosomes}_KR_Contacts.txt; |
| 81 | + gzip ${out_folder}/chr${chromosomes}_KR_Contacts.txt; |
| 82 | +fi |
| 83 | + |
0 commit comments