From 0ec5df588b0646056519825aad4bd09563266cd9 Mon Sep 17 00:00:00 2001 From: "Kevin Bonham, PhD" Date: Wed, 13 Nov 2024 11:32:56 -0500 Subject: [PATCH 1/2] mp v4 working --- aws-params.yaml | 4 ++-- metaphlan_only.nf | 18 ++++++++++++++++++ processes/metaphlan.nf | 11 +++-------- 3 files changed, 23 insertions(+), 10 deletions(-) create mode 100644 metaphlan_only.nf diff --git a/aws-params.yaml b/aws-params.yaml index ba0ae3e..377f1ad 100644 --- a/aws-params.yaml +++ b/aws-params.yaml @@ -1,8 +1,8 @@ readsdir: "s3://vkc-nextflow/rawfastq/" outdir: "s3://vkc-nextflow/output/" human_genome: "s3://biobakery-databases/kneaddata_databases/" -metaphlan_db: "s3://biobakery-databases/metaphlan_databases/" +metaphlan_db: "s3://biobakery-databases/metaphlan_v4_databases/" humann_bowtie_db: "s3://biobakery-databases/humann_databases/chocophlan" humann_protein_db: "s3://biobakery-databases/humann_databases/uniref" humann_utility_db: "s3://biobakery-databases/humann_databases/utility_mapping" -filepattern: "*_L00{1,2,3,4}_R{1,2}_001.fastq.gz" \ No newline at end of file +filepattern: "*_L00{1,2,3,4}_R{1,2}_001.fastq.gz" diff --git a/metaphlan_only.nf b/metaphlan_only.nf new file mode 100644 index 0000000..e4f02fc --- /dev/null +++ b/metaphlan_only.nf @@ -0,0 +1,18 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl=2 + +include { metaphlan; metaphlan_bzip } from './processes/metaphlan.nf' + +workflow { + + read_pairs_ch = Channel + .fromFilePairs( + [ "$params.readsdir/$params.filepattern", + "$params.readsdir/*_kneaddata.fastq.gz" ], + size:-1) + + metaphlan_db = params.metaphlan_db + + metaphlan_out = metaphlan(read_pairs_ch, metaphlan_db) + metaphlan_bzip = metaphlan_bzip(metaphlan_out[0], metaphlan_out[4]) +} diff --git a/processes/metaphlan.nf b/processes/metaphlan.nf index e081e44..ad35952 100644 --- a/processes/metaphlan.nf +++ b/processes/metaphlan.nf @@ -4,7 +4,6 @@ process metaphlan { input: tuple val(sample), path(kneads) - path unmatched path metaphlan_db output: @@ -15,13 +14,9 @@ process metaphlan { path "${sample}.sam" script: - def forward = kneads[0] - def reverse = kneads[1] - def unf = unmatched[0] - def unr = unmatched[1] - + """ - cat $forward $reverse $unf $unr > ${sample}_grouped.fastq.gz + cat $kneads > ${sample}_grouped.fastq.gz metaphlan ${sample}_grouped.fastq.gz ${sample}_profile.tsv \ --bowtie2out ${sample}_bowtie2.tsv \ @@ -49,4 +44,4 @@ process metaphlan { """ bzip2 -v $sam """ -} \ No newline at end of file +} From c277c986897853faf5139fa47ab304b0eb8ea22b Mon Sep 17 00:00:00 2001 From: "Kevin Bonham, PhD" Date: Thu, 2 Jan 2025 16:12:41 -0500 Subject: [PATCH 2/2] modify main and metaphlan --- main.nf | 6 ------ processes/metaphlan.nf | 5 +++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index e99896d..aa11be5 100755 --- a/main.nf +++ b/main.nf @@ -14,14 +14,8 @@ workflow { human_genome = params.human_genome metaphlan_db = params.metaphlan_db - humann_bowtie_db = params.humann_bowtie_db - humann_protein_db = params.humann_protein_db - humann_utility_db = params.humann_utility_db knead_out = kneaddata(read_pairs_ch, human_genome) metaphlan_out = metaphlan(knead_out[0], knead_out[1], metaphlan_db) metaphlan_bzip = metaphlan_bzip(metaphlan_out[0], metaphlan_out[4]) - humann_out = humann(metaphlan_out[0], metaphlan_out[1], metaphlan_out[2], humann_bowtie_db, humann_protein_db) - regroup_out = humann_regroup(humann_out[0], humann_out[1], humann_utility_db) - humann_rename(regroup_out, humann_utility_db) } diff --git a/processes/metaphlan.nf b/processes/metaphlan.nf index ad35952..ecc1a40 100644 --- a/processes/metaphlan.nf +++ b/processes/metaphlan.nf @@ -3,8 +3,9 @@ process metaphlan { publishDir "$params.outdir/metaphlan", pattern: "{*.tsv}" input: - tuple val(sample), path(kneads) - path metaphlan_db + tuple val(sample), path(kneads) + path unmatched + path metaphlan_db output: val sample , emit: sample