-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3d-preprocess.sh
More file actions
134 lines (95 loc) · 3.82 KB
/
3d-preprocess.sh
File metadata and controls
134 lines (95 loc) · 3.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
#SBATCH --account=
#SBATCH --nodes=
#SBATCH --ntasks=
#SBATCH --mem=
#SBATCH --qos=
#SBATCH --partition=
#SBATCH --time=13:49:59
############################
#SBATCH --job-name=chrom3d-preprocess
#SBATCH --output=%j-chrom3d-preprocess.out
############################
PPS="chrom3D/preprocess_scripts" # from chrom 3D pipeline
GENOME_SIZE="hg38.genome"
SAMPLE="samplename"
matrices="pathtohicmatrices"
matrix_50kb="${matrices}/${SAMPLE}.50kb.hicpro"
matrix_1mb="${matrices}/${SAMPLE}.1mb.hicpro"
abs_50kb="${matrices}/50kb_abs_chr.grch38.bed"
abs_1mb="${matrices}/1mb_abs_chr.grch38.bed"
tad="${SAMPLE}/tads_sorted.bed3"
cp $tad tad.bed
echo "convert hic-pro output"
echo "create intra-chromosomal contact matrices"
python ${PPS}/conv_hicpro_mat.py \
$matrix_50kb \
$abs_50kb \
> ${SAMPLE}_50000.intermediate.bedpe
mkdir -p intra_chr_RAWobserved
bash ${PPS}/make_intrachr_rawObserved.sh \
${GENOME_SIZE} \
${SAMPLE}_50000.intermediate.bedpe
echo "create inter-chromosomal contact matrices"
python ${PPS}/conv_hicpro_mat.py \
$matrix_1mb \
$abs_1mb \
> ${SAMPLE}_1000000.intermediate.bedpe
mkdir -p inter_chr_RAWobserved
bash ${PPS}/make_interchr_rawObserved.sh \
${GENOME_SIZE} \
${SAMPLE}_1000000.intermediate.bedpe
echo "TAD to domains, input is merged arrowhead out"
bash ${PPS}/arrowhead_to_domains.sh \
tad.bed \
${GENOME_SIZE}
echo "Concatenate all the .domains to use in a later step"
cat *.chr*.domains > ${SAMPLE}_domainlist.domains
echo "Compute intra-chromosomal interaction counts between TADs"
mkdir -p intrachr_bedpe
bash ${PPS}/intrachr_NCHG_input_auto.sh \
tad \
${GENOME_SIZE} \
50kb
echo "Concatenate all intra-chromosomal interaction counts"
cat intrachr_bedpe/chr*.bedpe > intrachr_bedpe/${SAMPLE}_50kb.domain.RAW.bedpe
echo "Remove domains that contain centromeres from the BEDPE file"
curl -s "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz" | gunzip -c | grep acen | \
pairToBed \
-a intrachr_bedpe/${SAMPLE}_50kb.domain.RAW.bedpe \
-b stdin -type neither > intrachr_bedpe/${SAMPLE}_50kb.domain.RAW.no_cen.bedpe
echo "Calculate the P-value and odds ratio for each pair of TADs"
${PPS}/NCHG_hic/NCHG -m 50000 \
-p intrachr_bedpe/${SAMPLE}_50kb.domain.RAW.no_cen.bedpe \
> ${SAMPLE}_50kb.domain.RAW.no_cen.NCHG.out
echo "Calculate FDR and filter significant interactions"
python ${PPS}/NCHG_fdr_oddratio_calc.py \
${SAMPLE}_50kb.domain.RAW.no_cen.NCHG.out fdr_bh 2 0.01 > ${SAMPLE}_50kb.domain.RAW.no_cen.NCHG.sig
echo "Create GTrack using significant interactions"
bash ${PPS}/make_gtrack.sh \
${SAMPLE}_50kb.domain.RAW.no_cen.NCHG.sig \
${SAMPLE}_domainlist.domains \
${SAMPLE}_intra_chromosome.gtrack
# # bedtools version needs to be 2.27.1 for this script to work
echo "Prepare inter-chromosomal Hi-C interaction counts"
bash ${PPS}/interchr_NCHG_input_auto.sh \
${GENOME_SIZE} \
${BLACKLIST} \
1mb > ${SAMPLE}_1mb_inter.bedpe
echo "Call significant inter-chromosomal interactions"
${PPS}/NCHG_hic/NCHG \
-i -p ${SAMPLE}_1mb_inter.bedpe > ${SAMPLE}_1mb_inter_chr.NCHG.out
echo "Calculate FDR and filter significant interactions"
python ${PPS}/NCHG_fdr_oddratio_calc.py \
${SAMPLE}_1mb_inter_chr.NCHG.out fdr_bh 2 0.01 > ${SAMPLE}_1mb_inter_chr.NCHG.sig
echo "Add significant inter-chromosomal interaction information to the GTrack"
bash ${PPS}/add_inter_chrom_beads_wo_lads.sh \
${SAMPLE}_intra_chromosome.gtrack \
${SAMPLE}_1mb_inter_chr.NCHG.sig \
${SAMPLE}_inter_intra_chr.gtrack
echo "Modify the Model Setup File to make a diploid model"
python ${PPS}/make_diploid_gtrack.py \
${SAMPLE}_inter_intra_chr.gtrack > ${SAMPLE}_inter_intra_chr.diploid.gtrack
echo "GTRACK sanity check"
grep -v '^#' ${SAMPLE}_inter_intra_chr.diploid.gtrack | cut -f 1 | sort | uniq -c
echo "DONE"