-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval_model.sh
More file actions
180 lines (150 loc) · 4.9 KB
/
eval_model.sh
File metadata and controls
180 lines (150 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/bin/bash
set -e ## For exiting on first error encountered
echo "> Starting model evaluation" - `date`
printf "\n"
#### Script command line arguments ###########################################
if [ $# -lt 2 ]; then
>&2 echo "[ERROR] Missing arguments"
>&2 echo "Usage: $(basename $0) EXP TRAIN_DIR [-u UBS] [-b BATCHSIZE] [-S SPLIT] [-w WEIGHTS]"
exit 1
fi
EXP=${1} # POC | CPLX
TRAIN_DIR=${2}
OPTIND=3
### Default arguments
BASECALLS_DIR=$TRAIN_DIR/'basecalls'
SPLIT='test'
while getopts "h?b:s:u:w:S:" opt; do
case "$opt" in
h|\?)
echo "Usage: $(basename $0) EXP TRAIN_DIR [-u UBS] [-b BATCHSIZE] [-S SPLIT] [-w WEIGHTS]"
exit 1
;;
b) BATCHSIZE='--batch '$OPTARG ;;
s) STRAND=$OPTARG ;;
S) SPLIT=$OPTARG ;;
u) case "$OPTARG" in
X) STRAND=F ;;
Y) STRAND=R ;;
esac ;;
w) WEIGHTS='--weights '$OPTARG
BASECALLS_DIR=$TRAIN_DIR/'basecalls-weights_'$OPTARG
;;
esac
done
echo "Arguments:"
echo "EXP="$EXP" | SPLIT="$SPLIT" | STRAND="$STRAND
echo "TRAIN_DIR="$TRAIN_DIR
echo "BATCHSIZE="$BATCHSIZE
echo "BASECALLS_DIR="$BASECALLS_DIR
echo "WEIGHTS="$WEIGHTS
#### Hard-coded Params ###########################################
REF_FILE=refdb_short.fasta
if [[ $EXP == 'POC' || $EXP == 'poc' ]]; then
REF_NAME='XNA20'; EXP='POC';
elif [[ $EXP == 'CPLX' || $EXP == 'cplx' ]]; then
REF_NAME='XNA1024'; EXP='CPLX';
else
echo "Unknown experiment: "$EXP
exit 1
fi
SAMPLE=$EXP-$SPLIT
STRAND_LIST='split_reads-'$SPLIT'.tsv';
if [[ $REF_NAME == 'XNA16' || $REF_NAME == 'XNA_4Ds' || $REF_NAME == 'XNA20' ]]; then
MAX_BC_DIST=5;
elif [[ $REF_NAME == 'XNA1024' || $REF_NAME == 'XNA1024-A027' ]]; then
MAX_BC_DIST=8;
fi
# EXP_DIR=$HOME'/projects/xna_basecallers/exps/'$EXP
# REFS_DIR=$HOME'/projects/xna_basecallers/xna_refs/'$REF_NAME
EXP_DIR='./xna_libs/'$EXP
REFS_DIR=$EXP_DIR
if [[ -n $STRAND ]]; then
BASE_STRAND_LIST=$(echo $STRAND_LIST | cut -d. -f1)
STRAND_LIST=$BASE_STRAND_LIST'-strands_'$STRAND'.tsv'
fi
echo "REF_NAME="$REF_NAME" | SAMPLE="$SAMPLE" | STRAND_LIST="$STRAND_LIST
# printf "\n"
# echo "Hard-coded Params:"
# echo "REFS_DIR="$REFS_DIR
# echo "EXP_DIR="$EXP_DIR
# echo "MAX_BC_DIST="$MAX_BC_DIST
if [[ $TRAIN_DIR == 'help' ]]; then
exit
fi
##### 1) basecalling ###########################################
printf "\n+++++ 1) basecalling ++++++++++++++++++++++++++++++++++++++++\n"
BASECALLS_FILE='reads-'$SAMPLE'.fastq'
# echo "BASECALLS_FILE="$BASECALLS_FILE
if [[ ! -e $BASECALLS_DIR/$BASECALLS_FILE || ! -s $BASECALLS_DIR/$BASECALLS_FILE ]]; then
echo "Basecalls filepath: "$BASECALLS_DIR/$BASECALLS_FILE;
mkdir -p $BASECALLS_DIR;
source ./ub-bonito/venv3/bin/activate;
(set -x; # For displaying the command before executing it.
bonito basecaller $TRAIN_DIR $EXP_DIR/reads -v \
--read-ids $EXP_DIR/$STRAND_LIST \
$BATCHSIZE $WEIGHTS \
> $BASECALLS_DIR/$BASECALLS_FILE);
deactivate;
if [[ ! -s $BASECALLS_DIR/$BASECALLS_FILE ]]; then
echo "[ERROR] basecalls file is empty!";
rm -f $BASECALLS_DIR/$BASECALLS_FILE;
fi
else
echo -e "Fastq file found: "$BASECALLS_FILE;
echo "Skipping..."
fi
##### 2) minimap2 ###########################################
printf "\n+++++ 2) minimap2 ++++++++++++++++++++++++++++++++++++++++\n"
ALIGN_FILE_PREFIX='alignment-'$SAMPLE
ALIGN_FILE=$ALIGN_FILE_PREFIX'.paf'
# if true; then
if [[ -e $BASECALLS_DIR/$BASECALLS_FILE && ! -e $BASECALLS_DIR/$ALIGN_FILE.gz && ! -e $BASECALLS_DIR/$ALIGN_FILE ]]; then
(set -x;
bin/minimap2 \
-x map-ont -t 12 -w 5 -c --cs=short --secondary=no \
$REFS_DIR/$REF_FILE \
$BASECALLS_DIR/$BASECALLS_FILE \
-o $BASECALLS_DIR/$ALIGN_FILE);
if [[ -s $BASECALLS_DIR/$ALIGN_FILE ]]; then
# gzip alignment.paf;
echo "Alignment finished."
else
rm -f $BASECALLS_DIR/$ALIGN_FILE;
fi
else
if [[ ! -e $BASECALLS_DIR/$BASECALLS_FILE ]]; then
echo "[ERROR] Basecalls NOT found: "$BASECALLS_DIR/$BASECALLS_FILE;
elif [[ -e $BASECALLS_DIR/$ALIGN_FILE.gz || -e $BASECALLS_DIR/$ALIGN_FILE ]]; then
echo "Alignment file found: "$ALIGN_FILE;
fi
echo "Skipping..."
fi
##### 3) analyze_paf.py ###########################################
printf "\n+++++ 3) analyze_paf.py ++++++++++++++++++++++++++++++++++++++++\n"
# if false; then
if [[ -e $BASECALLS_DIR/$ALIGN_FILE && -s $BASECALLS_DIR/$ALIGN_FILE ]]; then
RES_FILE=results_summ-$SAMPLE.csv
if [[ ! -e $BASECALLS_DIR/$RES_FILE ]]; then
if [ -n "$STRAND" ]; then
EXTRA_ARGS='-S '$STRAND
fi
(set -x;
python src/tools/analyze_paf.py $EXP $BASECALLS_DIR/$ALIGN_FILE \
-p -D -d $MAX_BC_DIST $EXTRA_ARGS \
-R $BASECALLS_DIR/$BASECALLS_FILE);
else
echo -e "Results file found: "$RES_FILE;
echo -e "Skipping...";
fi
else
if [[ -s $BASECALLS_DIR/$ALIGN_FILE ]]; then
echo "[ERROR] Alignment file NOT found: "$BASECALLS_DIR/$ALIGN_FILE;
else
echo "[WARNING] Alignment file is empty! No reads could be aligned.";
echo "> "$BASECALLS_DIR/$ALIGN_FILE;
fi
echo "Skipping..."
fi
printf "\n"
echo "> Model evaluation finished" - `date`