-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprep-for-preprocessing.sh
More file actions
executable file
·36 lines (30 loc) · 1.58 KB
/
prep-for-preprocessing.sh
File metadata and controls
executable file
·36 lines (30 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Run all the pre-processing
# Charlotte Capitanchik last edit 30.03.20
# Step 0. Make some directories we need
mkdir data
mkdir data/fastq
mkdir pre_processing/yeast-genome
# Step 1. Download the metadata from array express
wget https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-8895/E-MTAB-8895.sdrf.txt -O metadata.tsv
echo "Step 1 completed: Downloaded metadata from array express"
# Step 2. Download all the fastq into the fastq folder
# Note this is 93 files
files=$(cut -f34 metadata.tsv | tail -n +2)
for f in $files; do
name=$(basename $f)
echo "downloading ${name}"
wget $f -O ${name}
done
rename fastq.gz fq.gz E*fastq.gz
mv *fq.gz data/fastq/.
n_files=$(ls data/fastq | wc -l)
echo "Step 2 completed: Downloaded ${n_files} fastqs"
# Step 3. Download yeast genome and annotation
wget ftp://ftp.ensembl.org/pub/release-99/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz -O pre_processing/yeast-genome/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
wget ftp://ftp.ensembl.org/pub/release-99/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.99.gtf.gz -O pre_processing/yeast-genome/Saccharomyces_cerevisiae.R64-1-1.99.gtf.gz
gunzip pre_processing/yeast-genome/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
gunzip pre_processing/yeast-genome/Saccharomyces_cerevisiae.R64-1-1.99.gtf.gz
echo "Step 3 completed: Downloaded yeast genome and annotation from Ensembl"
# Step 4. Set up a conda environment with the software you need
conda env create -f psiclip_environment.yml
echo "Step 4 completed: Created psiclip conda environment"