-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaf2_fasta_format.sh
More file actions
executable file
·27 lines (22 loc) · 1.48 KB
/
af2_fasta_format.sh
File metadata and controls
executable file
·27 lines (22 loc) · 1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env bash
# TODO - make output_dir option
# input and output
# output_dir=$2
# create new output dir if none exists
if [ ! -d formatted_fastas ]; then
mkdir formatted_fastas
fi
# loop over all fasta files and reformat them
for file in "$@"; do
awk 'NR==1 {printf ">" FILENAME "\n"} 1' ${file} | # add file name as fasta header
sed '1s/\.fasta//g' | # remove .fasta from fasta name
awk '/^>/ {printf("\n%s\n",$0);next;} { printf("%s",$0);} END {printf("\n");}' | # concatenate all amino acid seqs to a single line, (also creates white new lines above and below new fasta header!)
sed '/^$/d' | # delete empty lines
sed '2,$s/^>.*$//g' | # remove all but the first fastas header
sed '/^$/d' | # delete empty lines
sed 's/*//g' | # remove any * from seqs
sed 's/^\w.*$/&:/g' | # add : to end of all seqs
sed '$s/://g' >"${file%.*}_AF2.fasta" # remove : from last seq and output file
done
# move all reformatted fasta files to output_dir
mv ./*_AF2.fasta formatted_fastas