diff --git a/.github/workflows/deploy_apptainer.yml b/.github/workflows/deploy_apptainer.yml index e7b8081..1034cc3 100644 --- a/.github/workflows/deploy_apptainer.yml +++ b/.github/workflows/deploy_apptainer.yml @@ -20,12 +20,6 @@ jobs: with: fetch-depth: 0 - - name: add apptainer source - shell: bash - run: | - sudo add-apt-repository -y ppa:apptainer/ppa - sudo apt-get update - - name: create dockerfile uses: snakemake/snakemake-github-action@v2 with: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6a893ca..a75b9aa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,20 +1,19 @@ -name: CI +name: Tests on: push: - branches: [main, dev] + branches: [main] pull_request: - branches: [main, dev] + branches: [main] jobs: Formatting: runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Test formatting + - name: Formatting uses: super-linter/super-linter@v7 env: VALIDATE_ALL_CODEBASE: false @@ -25,11 +24,10 @@ jobs: Linting: runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} steps: - uses: actions/checkout@v4 - - name: Test linting workflow - uses: snakemake/snakemake-github-action@v2.0.0 + - name: Lint workflow + uses: snakemake/snakemake-github-action@v2 with: directory: . snakefile: workflow/Snakefile @@ -37,21 +35,22 @@ jobs: Testing: runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} needs: + - Linting - Formatting steps: - uses: actions/checkout@v4 - - name: Test run workflow - uses: snakemake/snakemake-github-action@v2.0.0 + + - name: Test workflow + uses: snakemake/snakemake-github-action@v2 with: directory: .test snakefile: workflow/Snakefile - args: "--sdm conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache -n" + args: "--sdm conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache -n" - name: Test report - uses: snakemake/snakemake-github-action@v2.0.0 + uses: snakemake/snakemake-github-action@v2 with: directory: .test snakefile: workflow/Snakefile - args: "--cores 3 --report report.zip -n" + args: "--cores 2 --report report.zip" diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 78dcfea..e9ec84c 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -17,4 +17,4 @@ jobs: - uses: googleapis/release-please-action@v4 with: token: ${{ secrets.GITHUB_TOKEN }} - release-type: simple + release-type: go # just keep a changelog, no version anywhere outside of git tags diff --git a/README.md b/README.md index 0dc2967..f5f5a69 100644 --- a/README.md +++ b/README.md @@ -67,8 +67,6 @@ conda activate snakemake-ont-basecalling This step creates all conda environments specified in the snakemake rules. This step is optional. ```bash -# activate new environment -conda activate snakemake-ont-basecalling snakemake -c 1 --sdm conda --conda-create-envs-only --conda-cleanup-pkgs cache --directory .test ``` @@ -118,7 +116,7 @@ snakemake --cores 3 --sdm conda apptainer --directory .test --apptainer-args "-- To run the workflow with test files on a **slurm cluster**, adjust the slurm-specific profile `workflow/profiles/slurm/config.yaml` file and run: ```bash -snakemake --sdm conda --workflow-profile workflow/profiles/slurm/ --directory .test +snakemake --cores 3 --sdm conda --workflow-profile workflow/profiles/slurm/ --directory .test ``` **Note:** diff --git a/config/README.md b/config/README.md index 119dc02..8985e5d 100644 --- a/config/README.md +++ b/config/README.md @@ -44,7 +44,7 @@ snakemake --cores 3 --sdm conda apptainer --directory .test --apptainer-args "-- To run the workflow with test files on a **slurm cluster**, adjust the slurm-specific profile `workflow/profiles/slurm/config.yaml` file and run: ```bash -snakemake --sdm conda --workflow-profile workflow/profiles/slurm/ --directory .test +snakemake --cores 3 --sdm conda --workflow-profile workflow/profiles/slurm/ --directory .test ``` **Note:** diff --git a/resources/images/dag.png b/resources/images/dag.png index 197fb03..2696435 100644 Binary files a/resources/images/dag.png and b/resources/images/dag.png differ diff --git a/resources/images/dag.svg b/resources/images/dag.svg index 99d6fd2..7149dea 100644 --- a/resources/images/dag.svg +++ b/resources/images/dag.svg @@ -1,581 +1,163 @@ - - - - - - - - - - - - snakemake_dag - - - 4->11 - - - - - - 0 - - all - - - - 1 - - aggregrate_barcode - - - - 1->0 - - - - - - - - - - 4 - - dorado_demux - - - - - - - 11 - - aggregrate_file - - - - 4->11 - - - - - 4->11 - - - - - 4->11 - - - - - 4->11 - - - - - - 13 - - - - - - - 7 - - dorado_summary - - - - - 6 - - download_model - - - - - - - - 15 - - prepare_summary - - - - - - - - 10 - - gzip - - - - 10->1 - - - - - - 11->10 - - - - - - - - - 14 - - pycoQC_report - - - - 14->0 - - - - - - 15->14 - - - - - - 18 - - nanoplot_report - - - - 15->18 - - - - - - - - - 18->0 - - - - - 5 - - dorado_simplex - - + + +snakemake_dag + + + +0 + +all + + + +1 + +aggregrate_barcode + + + +1->0 + + + + + +2 + +gzip + + + +2->1 + + + + + +3 + +aggregrate_file + + + +3->2 + + + + + +4 + +download_model + + + +7 + +dorado_simplex + + + +4->7 + + + + + +5 + +collect_demuxed_fastq + + + +5->3 + + + + + +6 + +dorado_demux + + + +6->5 + + + + + +7->6 + + + + + +10 + +dorado_summary + + + +7->10 + + + + + +8 + +pycoQC_report + + + +8->0 + + + + + +9 + +prepare_summary + + + +9->8 + + + + + +11 + +nanoplot_report + + + +9->11 + + + + + +10->9 + + + + + +11->0 + + + + diff --git a/workflow/Snakefile b/workflow/Snakefile index 8f9b07c..2e04d27 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -43,7 +43,7 @@ configfile: "config/config.yml" # ----------------------------------------------------- # container definition (optional) # ----------------------------------------------------- -container: "oras://ghcr.io/mpusp/snakemake-ont-basecalling:latest" +containerized: "oras://ghcr.io/mpusp/snakemake-ont-basecalling:latest" # ----------------------------------------------------- @@ -77,9 +77,6 @@ onerror: localrules: download_model, - dorado_demux, - aggregrate_file, - aggregrate_barcode, # ----------------------------------------------------- diff --git a/workflow/profiles/slurm/config.yaml b/workflow/profiles/slurm/config.yaml index 89764cb..6cf8310 100644 --- a/workflow/profiles/slurm/config.yaml +++ b/workflow/profiles/slurm/config.yaml @@ -14,7 +14,7 @@ default-resources: set-resources: download_model: mem_mb: 48000 - cpus_per_task: 4 + cpus_per_task: 1 dorado_simplex: slurm_partition: "scc-gpu" gpu: 1 @@ -25,13 +25,13 @@ set-resources: cpus_per_task: 40 dorado_demux: mem_mb: 48000 - cpus_per_task: 40 + cpus_per_task: 4 gzip: mem_mb: 48000 cpus_per_task: 40 pycoQC_report: mem_mb: 48000 - cpus_per_task: 12 + cpus_per_task: 1 runtime: "60min" nanoplot_report: mem_mb: 48000 @@ -41,8 +41,8 @@ set-resources: # threads define how much resources each rule may take, overrides .smk defs: set-threads: dorado_simplex: 1 - dorado_demux: 40 + dorado_demux: 4 dorado_summary: 40 gzip: 40 - pycoQC_report: 12 + pycoQC_report: 1 nanoplot_report: 12 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index be0fde7..bb48b14 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -48,6 +48,14 @@ def check_dorado_version(dorado_path, min_dorado_version): ) +def get_run_files(run): + file_ext = config["input"]["file_extension"] + run_dir = runs.loc[run, "data_folder"] + pattern = f"{run_dir}/{{file}}{file_ext}" + files = glob_wildcards(pattern).file + return files + + # ----------------------------------------------------- # input functions # ----------------------------------------------------- @@ -58,26 +66,34 @@ def get_pod5(wildcards): ) +def get_demuxed_flag(wildcards): + return expand( + "results/{run}/dorado_demux/{file}/demux.finished", + run=wildcards.run, + file=get_run_files(wildcards.run), + ) + + def get_demuxed_fastq(wildcards): # parse file names file_ext = config["input"]["file_extension"] data_dir = runs.loc[wildcards.run, "data_folder"] pattern = f"{data_dir}/{{file}}{file_ext}" files = glob_wildcards(pattern).file - # parse prefix - cp_out = [ - checkpoints.dorado_demux.get( - run=wildcards.run, barcode=wildcards.barcode, file=curr_file - ).output[0] - for curr_file in files - ] + # construct base output paths + cp_out = expand( + "results/{run}/dorado_demux/{file}", + run=wildcards.run, + file=files, + ) base_dir = os.path.commonpath(cp_out) + # glob pattern for demuxed fastq files globs = glob_wildcards( os.path.join( base_dir, "{file}/{prefix}_barcode{barcode}_{suffix}_00000000_0.fastq" ) ) - # construct input targets + # construct all input targets result = expand( "results/{run}/dorado_demux/{file}/{prefix}_barcode{barcode}_{suffix}_00000000_0.fastq", run=wildcards.run, diff --git a/workflow/rules/demultiplex.smk b/workflow/rules/demultiplex.smk index 0b9afd1..6221894 100644 --- a/workflow/rules/demultiplex.smk +++ b/workflow/rules/demultiplex.smk @@ -1,20 +1,20 @@ # ----------------------------------------------------- # demultiplex dorado basecall files # ----------------------------------------------------- -checkpoint dorado_demux: +rule dorado_demux: input: bam="results/{run}/dorado_simplex/{file}.bam", - summary="results/{run}/dorado_summary/{file}.summary", output: fastq=directory("results/{run}/dorado_demux/{file}"), + flag="results/{run}/dorado_demux/{file}/demux.finished", params: dorado=config["dorado"]["path"], cuda=config["dorado"]["simplex"]["cuda"], conda: "../envs/base.yml" - threads: int(workflow.cores * 0.2) wildcard_constraints: file=config["input"]["file_regex"], + threads: 4 log: "results/{run}/dorado_demux/{file}.log", shell: @@ -27,7 +27,29 @@ checkpoint dorado_demux: --no-classify \ {input.bam} 2> {log}; find {output.fastq} -mindepth 4 -type f -name '*.fastq' -exec mv {{}} {output.fastq}/ \\; - find {output.fastq} -mindepth 1 -type d -empty -delete + find {output.fastq} -mindepth 1 -type d -empty -delete; + touch {output.flag} + """ + + +# ----------------------------------------------------- +# collect demuxed fastq files (pseudo rule) +# ----------------------------------------------------- +checkpoint collect_demuxed_fastq: + input: + get_demuxed_flag, + output: + "results/{run}/dorado_demux/demux_finished.txt", + conda: + "../envs/base.yml" + threads: 1 + log: + "results/{run}/dorado_demux/demux_finished.log", + shell: + """ + printf '%s\n' $(echo {input}) > {output} 2> {log}; + echo 'Collected FASTQ files:' >> {log}; + echo $(wc -l {output}) >> {log}; """ @@ -36,6 +58,7 @@ checkpoint dorado_demux: # ----------------------------------------------------- rule aggregrate_file: input: + flag=rules.collect_demuxed_fastq.output, fastq=get_demuxed_fastq, output: fastq="results/{run}/dorado_aggregate/{barcode}.fastq",