diff --git a/README.md b/README.md index af95fd4..bd39180 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ The repository at https://github.com/pachterlab/sleuth_paper_analysis should alw # Preliminaries - Install [snakemake](https://bitbucket.org/johanneskoester/snakemake) +- Install [sratoolkit](https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?cmd=show&f=software&m=software&s=software) - Download and install `R` along with dependencies listed below (R dependencies section) - Updated the `BASE` variable in `config.py` to represent the base path on your system @@ -36,6 +37,8 @@ Install using `install.packages()` - `jsonlite` - `reshape2` - `scales` +- `VennDiagram` +- `knitr` ### from Bioconductor diff --git a/annotation/Snakefile b/annotation/Snakefile index 8b185cd..9fb5b1c 100644 --- a/annotation/Snakefile +++ b/annotation/Snakefile @@ -36,7 +36,7 @@ rule get_genome: output: GENOME_FA shell: - 'curl -o {output}' + 'curl -o {output}.gz' ' --silent' ' ftp://ftp.ensembl.org/pub/release-80/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' ' && ' diff --git a/bottomly/R/fdr.Rmd b/bottomly/R/fdr.Rmd index 019c359..2ef2d8c 100644 --- a/bottomly/R/fdr.Rmd +++ b/bottomly/R/fdr.Rmd @@ -86,6 +86,7 @@ p ``` ```{r} +dir.create(base_dir, showWarnings = FALSE, recursive=T) filename <- file.path(base_dir, paste0('isoform_resampling', default_extension)) save_plot(filename, p, base_aspect_ratio = 0.75, base_height = 15) ``` diff --git a/bottomly/R/var_plots.Rmd b/bottomly/R/var_plots.Rmd index c9a89cb..0c15a7d 100644 --- a/bottomly/R/var_plots.Rmd +++ b/bottomly/R/var_plots.Rmd @@ -372,6 +372,7 @@ p ``` ```{r} +dir.create(base_dir, showWarnings = FALSE, recursive=T) filename <- file.path(base_dir, paste0('inferential_variance', default_extension)) save_plot(filename, p, base_aspect_ratio = 1.6, base_height = 15) diff --git a/bottomly/Snakefile b/bottomly/Snakefile index 4ff89b3..d981b7f 100644 --- a/bottomly/Snakefile +++ b/bottomly/Snakefile @@ -190,6 +190,8 @@ rule run_benchmarks_isoform: input: expand('results/single/{id}/kallisto/abundance.h5', id = SRA_SINGLE), expand('results/validation/{num}/cuffdiff/isoform_exp.diff', + num = range(1, 20 + 1)), + expand('results/training/{num}/cuffdiff/gene_exp.diff', num = range(1, 20 + 1)) output: 'results/isoform_self_benchmark.rds', @@ -234,6 +236,7 @@ def get_cuffdiff_training(wildcards): b = TRAINING_B[num].split(' ') ids = a + b return expand('results/single/{id}/cuffquant/abundances.cxb', id = ids) + rule cuffdiff_training: input: get_cuffdiff_training @@ -269,6 +272,7 @@ def get_cuffdiff_validation(wildcards): b = VALIDATION_B[num].split(' ') ids = a + b return expand('results/single/{id}/cuffquant/abundances.cxb', id = ids) + rule cuffdiff_validation: input: get_cuffdiff_validation diff --git a/cuffdiff2_analysis/R/investigating_variance.Rmd b/cuffdiff2_analysis/R/investigating_variance.Rmd index 791bee2..45b7fad 100644 --- a/cuffdiff2_analysis/R/investigating_variance.Rmd +++ b/cuffdiff2_analysis/R/investigating_variance.Rmd @@ -20,8 +20,9 @@ source('../../geuvadis/R/benchmark_methods.R') source('../../simulation_core/R/simulate_de.R') library('sleuth') +info_save = info -info <- dplyr::mutate(info, +info <- dplyr::mutate(info_save, condition = ifelse(condition == 'scramble', 'A', 'B')) sir <- run_sleuth(info, max_bootstrap = 100) @@ -121,7 +122,7 @@ ggplot() ## comparing results to limma and DESeq2 ```{r} -info <- dplyr::mutate(info, +info <- dplyr::mutate(info_save, condition = ifelse(condition == 'scramble', 'A', 'B')) sgr <- run_sleuth(info, max_bootstrap = 30, gene_column = 'ens_gene', @@ -156,6 +157,7 @@ fp_plots <- lapply(fp_in_order[1:9], ```{r} +library(cowplot) plot_grid(plotlist = fp_plots) ``` diff --git a/cuffdiff2_analysis/Snakefile b/cuffdiff2_analysis/Snakefile index a4fa0cd..ce90bb4 100644 --- a/cuffdiff2_analysis/Snakefile +++ b/cuffdiff2_analysis/Snakefile @@ -29,15 +29,13 @@ rule all: rule metadata: output: - META, - "metadata/hiseq_accession.txt" + META shell: source_r('R', 'get_sample_info.R') rule fastq_dump_paired: input: - META, - "metadata/hiseq_accession.txt" + META output: 'data/paired/{id}', 'data/paired/{id}/{id}_1.fastq.gz', diff --git a/software/Snakefile b/software/Snakefile index e883014..b365c0a 100644 --- a/software/Snakefile +++ b/software/Snakefile @@ -141,9 +141,9 @@ rule get_seqtk: branch = '4feb6e81444ab6bc44139dd3a125068f81ae4ad8' shell: 'git clone {params.url} {params.tmp};' + ' cd {params.tmp};' ' git reset --hard {params.branch};' # 'git clone --branch {params.branch} --depth 1 {params.url} {params.tmp};' - ' cd {params.tmp};' ' make;' ' cd ..;' # 'find {params.tmp} -maxdepth 1 -perm +o+x -type f -exec cp {{}} bin \;' @@ -160,8 +160,8 @@ rule get_BitSeq: branch = '72fe9d9408467ac55fcbc717472e45ec71626b45' shell: 'git clone {params.url} {params.tmp};' - ' git reset --hard {params.branch};' ' cd {params.tmp};' + ' git reset --hard {params.branch};' ' make;' ' cd ..;' # 'find {params.tmp} -maxdepth 1 -perm +o+x -type f -exec cp {{}} bin \;'