diff --git a/conf/test_full.config b/conf/test_full.config index ea90cc16..35f705b0 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,15 +10,68 @@ ---------------------------------------------------------------------------------------- */ +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] + + withName: FASTQC { + cpus = { 1 } + memory = { 15.GB * task.attempt } + } + withName: ADAPTER_REMOVAL { + cpus = { 8 } + memory = { 15.GB * task.attempt } + time = { 2.h * task.attempt } + } + withName: PICARD_CREATESEQUENCEDICTIONARY { + cpus = { 12 } + memory = { 15.GB * task.attempt } + time = { 8.h * task.attempt } + } + withName: PICARD_MARKDUPLICATES { + memory = { 15.GB } + } + withName: BWA_ALN { + cpus = { 8 } + memory = { 15.GB * task.attempt } + time = { 8.h * task.attempt } + } + withName: DEDUP { + cpus = { 8 } + memory = { 15.GB * task.attempt } + time = { 4.h * task.attempt } + } + withName: GENOTYPING_HC { + cpus = { 8 } + memory = { 15.GB * task.attempt } + time = { 8.h * task.attempt } + } +} + params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/TCLamnidis/test-datasets/' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'eager/testdata/Benchmarking/eager3_benchmarking_vikingfish.tsv' // Genome references - genome = 'R64-1-1' + // Genome reference + fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Gadus_morhua/reference/GCF_902167405.1_gadMor3.0/GCF_902167405.1_gadMor3.0_rna.fna.gz' + + bwaalnn = 0.04 + bwaalnl = 1024 + + run_bam_filtering = true + bam_unmapped_type = 'discard' + bam_mapping_quality_threshold = 25 + + run_genotyping = true + genotyping_tool = 'hc' + genotyping_source = 'raw' + gatk_ploidy = 2 } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 609f44fd..024e0fa2 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-01-16T10:21:50+00:00", - "description": "

\n \n \n \"nf-core/eager\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/eager)\n[![GitHub Actions CI Status](https://github.com/nf-core/eager/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/eager/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/eager/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/eager/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/eager/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1465061-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1465061)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/eager)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23eager-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/eager)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/eager** is a scalable and reproducible bioinformatics best-practise processing pipeline for genomic NGS sequencing data, with a focus on ancient DNA (aDNA) data. It is ideal for the (palaeo)genomic analysis of humans, animals, plants, microbes and even microbiomes.\n\n## Pipeline summary\n\n\n\n\n- (Optionally) create reference genome indices for mapping (`bwa`, `samtools`, and `picard`)\n- Sequencing quality control (`FastQC`, `Falco`)\n- Sequencing adapter removal, paired-end data merging (`AdapterRemoval`)\n- Read mapping to reference using (`bwa aln`, `bwa mem`, `CircularMapper`, `bowtie2`, or `mapAD`)\n- Post-mapping processing, statistics and conversion to bam (`samtools`, and `preseq`)\n- Ancient DNA C-to-T damage pattern visualisation (`DamageProfiler`)\n- PCR duplicate removal (`DeDup` or `MarkDuplicates`)\n- Post-mapping statistics and BAM quality control (`Qualimap`)\n- Library Complexity Estimation (`preseq`)\n- Overall pipeline statistics summaries (`MultiQC`)\n\n### Additional Steps\n\nAdditional functionality contained by the pipeline currently includes:\n\n#### Input\n\n- Automatic merging of complex sequencing setups (e.g. multiple lanes, sequencing configurations, library types)\n\n#### Preprocessing\n\n- Illumina two-coloured sequencer poly-G tail removal (`fastp`)\n- Post-AdapterRemoval trimming of FASTQ files prior mapping (`fastp`)\n- Automatic conversion of unmapped reads to FASTQ (`samtools`)\n- Host DNA (mapped reads) stripping from input FASTQ files (for sensitive samples)\n\n#### aDNA Damage manipulation\n\n- Damage removal/clipping for UDG+/UDG-half treatment protocols (`BamUtil`)\n- Damaged reads extraction and assessment (`PMDTools`)\n- Nuclear DNA contamination estimation of human samples (`angsd`)\n\n#### Genotyping\n\n- Creation of VCF genotyping files (`GATK UnifiedGenotyper`, `GATK HaplotypeCaller` and `FreeBayes`)\n- Creation of EIGENSTRAT genotyping files (`pileupCaller`)\n- Creation of Genotype Likelihood files (`angsd`)\n- Consensus sequence FASTA creation (`VCF2Genome`)\n- SNP Table generation (`MultiVCFAnalyzer`)\n\n#### Biological Information\n\n- Mitochondrial to Nuclear read ratio calculation (`MtNucRatioCalculator`)\n- Statistical sex determination of human individuals (`Sex.DetERRmine`)\n\n#### Metagenomic Screening\n\n- Low-sequenced complexity filtering (`BBduk` or `PRINSEQ++`)\n- Taxonomic binner with alignment (`MALT` or `MetaPhlAn 4`)\n- Taxonomic binner without alignment (`Kraken2`,`KrakenUniq`)\n- aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)\n\n#### Functionality Overview\n\nA graphical overview of suggested routes through the pipeline depending on context can be seen below.\n\n

\n \"nf-core/eager\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.tsv`:\n\n```csv\nample_id\tlibrary_id\tlane\tcolour_chemistry\tpairment\tstrandedness\tdamage_treatment\tr1\tr2\tbam\tbam_reference_id\nsample1\tsample1_a\t1\t4\tpaired\tdouble\tnone\t///sample1_a_l1_r1.fq.gz ///sample1_a_l1_r2.fq.gz\tNA\tNA\nsample2\tsample2_a\t2\t2\tsingle\tdouble\tfull\t///sample2_a_l1_r1.fq.gz\tNA\tNA\tNA\nsample3\tsample3_a\t8\t4\tsingle\tdouble\thalf\tNA\tNA\t///sample31_a.bam\tMammoth_MT_Krause\n```\n\nEach row represents a fastq file (single-end), pair of fastq files (paired end), and/or a bam file.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/eager \\\n -profile \\\n --input samplesheet.csv \\\n --fasta '.fasta' \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/eager/usage) and the [parameter documentation](https://nf-co.re/eager/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/eager/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/eager/output).\n\n## Credits\n\nThis pipeline was established by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)) and [James A. Fellows Yates](https://github.com/jfy133). Version two had major contributions from [Stephen Clayton](https://github.com/sc13-bioinf), [Thiseas C. Lamnidis](https://github.com/TCLamnidis), [Maxime Borry](https://github.com/maxibor), [Zandra Fagern\u00e4s](https://github.com/ZandraFagernas), [Aida Andrades Valtue\u00f1a](https://github.com/aidaanva) and [Maxime Garcia](https://github.com/MaxUlysse) and the nf-core community.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Alex H\u00fcbner](https://github.com/alexhbnr)\n- [Alexandre Gilardet](https://github.com/alexandregilardet)\n- Arielle Munters\n- [\u00c5shild V\u00e5gene](https://github.com/ashildv)\n- [Charles Plessy](https://github.com/charles-plessy)\n- [Elina Salmela](https://github.com/esalmela)\n- [Fabian Lehmann](https://github.com/Lehmann-Fabian)\n- [He Yu](https://github.com/paulayu)\n- [Hester van Schalkwyk](https://github.com/hesterjvs)\n- [Ian Light-M\u00e1ka](https://github.com/ilight1542)\n- [Ido Bar](https://github.com/IdoBar)\n- [Irina Velsko](https://github.com/ivelsko)\n- [I\u015f\u0131n Alt\u0131nkaya](https://github.com/isinaltinkaya)\n- [Johan Nylander](https://github.com/nylander)\n- [Jonas Niemann](https://github.com/NiemannJ)\n- [Katerine Eaton](https://github.com/ktmeaton)\n- [Kathrin N\u00e4gele](https://github.com/KathrinNaegele)\n- [Kevin Lord](https://github.com/lordkev)\n- [Luc Venturini](https://github.com/lucventurini)\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Marcel Keller](https://github.com/marcel-keller)\n- [Megan Michel](https://github.com/meganemichel)\n- [Merlin Szymanski](https://github.com/merszym)\n- [Pierre Lindenbaum](https://github.com/lindenb)\n- [Pontus Skoglund](https://github.com/pontussk)\n- [Raphael Eisenhofer](https://github.com/EisenRa)\n- [Roberta Davidson](https://github.com/roberta-davidson)\n- [Rodrigo Barquera](https://github.com/RodrigoBarquera)\n- [Selina Carlhoff](https://github.com/scarlhoff)\n- [Torsten G\u00fcnter](https://bitbucket.org/tguenther)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#eager` channel](https://nfcore.slack.com/channels/eager) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/eager for your analysis, please cite it using the following doi:\n\n> Fellows Yates JA, Lamnidis TC, Borry M, Valtue\u00f1a Andrades A, Fagern\u00e4s Z, Clayton S, Garcia MU, Neukamm J, Peltzer A. 2021. Reproducible, portable, and efficient ancient genome reconstruction with nf-core/eager. PeerJ 9:e10947. DOI: [10.7717/peerj.10947](https://doi.org/10.7717/peerj.10947).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/eager\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/eager)\n[![GitHub Actions CI Status](https://github.com/nf-core/eager/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/eager/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/eager/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/eager/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/eager/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1465061-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.1465061)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/eager)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23eager-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/eager)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/eager** is a scalable and reproducible bioinformatics best-practise processing pipeline for genomic NGS sequencing data, with a focus on ancient DNA (aDNA) data. It is ideal for the (palaeo)genomic analysis of humans, animals, plants, microbes and even microbiomes.\n\n## Pipeline summary\n\n\n\n\n- (Optionally) create reference genome indices for mapping (`bwa`, `samtools`, and `picard`)\n- Sequencing quality control (`FastQC`, `Falco`)\n- Sequencing adapter removal, paired-end data merging (`AdapterRemoval`)\n- Read mapping to reference using (`bwa aln`, `bwa mem`, `CircularMapper`, `bowtie2`, or `mapAD`)\n- Post-mapping processing, statistics and conversion to bam (`samtools`, and `preseq`)\n- Ancient DNA C-to-T damage pattern visualisation (`DamageProfiler`)\n- PCR duplicate removal (`DeDup` or `MarkDuplicates`)\n- Post-mapping statistics and BAM quality control (`Qualimap`)\n- Library Complexity Estimation (`preseq`)\n- Overall pipeline statistics summaries (`MultiQC`)\n\n### Additional Steps\n\nAdditional functionality contained by the pipeline currently includes:\n\n#### Input\n\n- Automatic merging of complex sequencing setups (e.g. multiple lanes, sequencing configurations, library types)\n\n#### Preprocessing\n\n- Illumina two-coloured sequencer poly-G tail removal (`fastp`)\n- Post-AdapterRemoval trimming of FASTQ files prior mapping (`fastp`)\n- Automatic conversion of unmapped reads to FASTQ (`samtools`)\n- Host DNA (mapped reads) stripping from input FASTQ files (for sensitive samples)\n\n#### aDNA Damage manipulation\n\n- Damage removal/clipping for UDG+/UDG-half treatment protocols (`BamUtil`)\n- Damaged reads extraction and assessment (`PMDTools`)\n- Nuclear DNA contamination estimation of human samples (`angsd`)\n\n#### Genotyping\n\n- Creation of VCF genotyping files (`GATK UnifiedGenotyper`, `GATK HaplotypeCaller` and `FreeBayes`)\n- Creation of EIGENSTRAT genotyping files (`pileupCaller`)\n- Creation of Genotype Likelihood files (`angsd`)\n- Consensus sequence FASTA creation (`VCF2Genome`)\n- SNP Table generation (`MultiVCFAnalyzer`)\n\n#### Biological Information\n\n- Mitochondrial to Nuclear read ratio calculation (`MtNucRatioCalculator`)\n- Statistical sex determination of human individuals (`Sex.DetERRmine`)\n\n#### Metagenomic Screening\n\n- Low-sequenced complexity filtering (`BBduk` or `PRINSEQ++`)\n- Taxonomic binner with alignment (`MALT` or `MetaPhlAn 4`)\n- Taxonomic binner without alignment (`Kraken2`,`KrakenUniq`)\n- aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)\n\n#### Functionality Overview\n\nA graphical overview of suggested routes through the pipeline depending on context can be seen below.\n\n

\n \"nf-core/eager\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.tsv`:\n\n```csv\nample_id\tlibrary_id\tlane\tcolour_chemistry\tpairment\tstrandedness\tdamage_treatment\tr1\tr2\tbam\tbam_reference_id\nsample1\tsample1_a\t1\t4\tpaired\tdouble\tnone\t///sample1_a_l1_r1.fq.gz ///sample1_a_l1_r2.fq.gz\tNA\tNA\nsample2\tsample2_a\t2\t2\tsingle\tdouble\tfull\t///sample2_a_l1_r1.fq.gz\tNA\tNA\tNA\nsample3\tsample3_a\t8\t4\tsingle\tdouble\thalf\tNA\tNA\t///sample31_a.bam\tMammoth_MT_Krause\n```\n\nEach row represents a fastq file (single-end), pair of fastq files (paired end), and/or a bam file.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/eager \\\n -profile \\\n --input samplesheet.csv \\\n --fasta '.fasta' \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/eager/usage) and the [parameter documentation](https://nf-co.re/eager/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/eager/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/eager/output).\n\n## Credits\n\nThis pipeline was established by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)) and [James A. Fellows Yates](https://github.com/jfy133). Version two had major contributions from [Stephen Clayton](https://github.com/sc13-bioinf), [Thiseas C. Lamnidis](https://github.com/TCLamnidis), [Maxime Borry](https://github.com/maxibor), [Zandra Fagern\u00e4s](https://github.com/ZandraFagernas), [Aida Andrades Valtue\u00f1a](https://github.com/aidaanva) and [Maxime Garcia](https://github.com/MaxUlysse) and the nf-core community.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Alex H\u00fcbner](https://github.com/alexhbnr)\n- [Alexandre Gilardet](https://github.com/alexandregilardet)\n- Arielle Munters\n- [\u00c5shild V\u00e5gene](https://github.com/ashildv)\n- [Charles Plessy](https://github.com/charles-plessy)\n- [Elina Salmela](https://github.com/esalmela)\n- [Fabian Lehmann](https://github.com/Lehmann-Fabian)\n- [He Yu](https://github.com/paulayu)\n- [Hester van Schalkwyk](https://github.com/hesterjvs)\n- [Ian Light-M\u00e1ka](https://github.com/ilight1542)\n- [Ido Bar](https://github.com/IdoBar)\n- [Irina Velsko](https://github.com/ivelsko)\n- [I\u015f\u0131n Alt\u0131nkaya](https://github.com/isinaltinkaya)\n- [Johan Nylander](https://github.com/nylander)\n- [Jonas Niemann](https://github.com/NiemannJ)\n- [Katerine Eaton](https://github.com/ktmeaton)\n- [Kathrin N\u00e4gele](https://github.com/KathrinNaegele)\n- [Kevin Lord](https://github.com/lordkev)\n- [Luc Venturini](https://github.com/lucventurini)\n- [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)\n- [Marcel Keller](https://github.com/marcel-keller)\n- [Megan Michel](https://github.com/meganemichel)\n- [Merlin Szymanski](https://github.com/merszym)\n- [Pierre Lindenbaum](https://github.com/lindenb)\n- [Pontus Skoglund](https://github.com/pontussk)\n- [Raphael Eisenhofer](https://github.com/EisenRa)\n- [Roberta Davidson](https://github.com/roberta-davidson)\n- [Rodrigo Barquera](https://github.com/RodrigoBarquera)\n- [Selina Carlhoff](https://github.com/scarlhoff)\n- [Torsten G\u00fcnter](https://bitbucket.org/tguenther)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#eager` channel](https://nfcore.slack.com/channels/eager) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/eager for your analysis, please cite it using the following doi:\n\n> Fellows Yates JA, Lamnidis TC, Borry M, Valtue\u00f1a Andrades A, Fagern\u00e4s Z, Clayton S, Garcia MU, Neukamm J, Peltzer A. 2021. Reproducible, portable, and efficient ancient genome reconstruction with nf-core/eager. PeerJ 9:e10947. DOI: [10.7717/peerj.10947](https://doi.org/10.7717/peerj.10947).\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/tests/test_full.nf.test b/tests/test_full.nf.test new file mode 100644 index 00000000..9efdc314 --- /dev/null +++ b/tests/test_full.nf.test @@ -0,0 +1,152 @@ +nextflow_pipeline { + + name "Test pipeline: NFCORE_EAGER" + script "main.nf" + tag "pipeline" + tag "nfcore_eager" + tag "test_full" // Tag containing the name of the profile to test. Should match the profile name below + profile "test_full" // The name of the profile used when testing + + test("Test `test_full` profile:") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + + /////////////////// + // DOCUMENTATION // + /////////////////// + + // The contents of each top level results directory should be tested with individually named snapshots. + // Within each snapshot, there should be two to three distinct variables, that contain the files to be tested. + // - stable_name_

is for files with variable md5sums (i.e. content) so only names will be compared + // - stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared + // - bams_ is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable) + // If a directory is fully stable, you can drop `stable_name_*` + // If a directory contains no BAMs, you can drop `bams_*` + + // Due to the very long runtime of the full test, the snapshots were generated on the EVA computational cluster. + // Generate with: nf-test test --profile=+eva,archgen --tag test_full --update-snapshot + // Test with: nf-test test --profile=+eva,archgen --tag test_full + // NOTE: BAMs are always only stable in name, because: + // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) + // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) + // point b) also causes BAIs to be unstable. + // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes) + + ////////////////////// + // DEFINE VARIABLES // + ////////////////////// + + // Define exclusion patterns for files with unstable contents + // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here + // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section + def unstable_patterns_auth = [ + '**/mapped_reads_gc-content_distribution.txt', + '**/mapped_reads_nucleotide_content.txt', + '**/genome_gc_content_per_window.png', + '**/*.{svg,pdf,html,png}', + '**/DamageProfiler.log', + '**/3p_freq_misincorporations.txt', + '**/5p_freq_misincorporations.txt', + '**/DNA_comp_genome.txt', + '**/DNA_composition_sample.txt', + '**/misincorporation.txt', + '**/genome_results.txt', + '**/*command.log', + ] + + // Check that no files are missing/added + // Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns + def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] ) + + // Authentication + // def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) + // def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) + + // // Deduplication + // def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + // def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Final_bams + // def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + // def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Mapping (incl. bam_input flasgstat) + // def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + // def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Preprocessing + // // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. + // def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] ) + // def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] ) + + // // Read filtering + // def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) + // def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) + + // // Genotyping + // def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] ) + // def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] ) + // // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above). + // def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] ) + + // // Metagenomics + // def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'] , ignoreFile: null , include: ['**/*'] ) + // def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] ) + + // MultiQC + // def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] ) + + /////////////////////// + // DEFINE ASSERTIONS // + /////////////////////// + + assertAll( + { assert workflow.success }, + // This checks that there are no missing or additional output files. + // Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections. + { assert snapshot( stable_name_all*.name ).match("all_files") }, + + // Checking changes to contents of each section + // NOTE: Keep the order of the sections in the alphanumeric order of the output directories. + // Each section should first check stable_content, stable_name second (if applicable). + // { assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") }, + // { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, + // { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, + // // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 + // { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, + // { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") }, + // { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") }, + // { assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") }, + // // Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names + // { assert snapshot( + // genotyping_vcfs.collect { + // file -> + // def vcf_head = path(file.toString()).vcf.header + // // The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums. + // def header_md5 = [ + // vcf_head.getFormatHeaderLines().toString(), + // vcf_head.getInfoHeaderLines().toString(), + // vcf_head.getFilterLines().toString(), + // vcf_head.getIDHeaderLines().toString(), + // vcf_head.getGenotypeSamples().toString(), + // vcf_head.getContigLines().toString(), + // ].join(' ').md5() + // file.getName() + ":header_md5," + header_md5 + // } + // ).match("genotyping_vcfs")}, + // { assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") }, + // { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, + + // Versions + { assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() }, + + ) + } + } +} diff --git a/tests/test_full.nf.test.snap b/tests/test_full.nf.test.snap new file mode 100644 index 00000000..38457193 --- /dev/null +++ b/tests/test_full.nf.test.snap @@ -0,0 +1,77 @@ +{ + "all_files": { + "content": [ + [ + "COD076_COD076E1bL1_GCF_902167405.1_gadMor3.0_rna.c_curve.txt", + "COD076_COD076E1bL1_GCF_902167405.1_gadMor3.0_rna.command.log", + "COD092_COD092E1bL1i69_GCF_902167405.1_gadMor3.0_rna.c_curve.txt", + "COD092_COD092E1bL1i69_GCF_902167405.1_gadMor3.0_rna.command.log", + "COD076_COD076E1bL1_GCF_902167405.1_gadMor3.0_rna_sorted.bam", + "COD076_COD076E1bL1_GCF_902167405.1_gadMor3.0_rna_sorted.bam.bai", + "COD092_COD092E1bL1i69_GCF_902167405.1_gadMor3.0_rna_sorted.bam", + "COD092_COD092E1bL1i69_GCF_902167405.1_gadMor3.0_rna_sorted.bam.bai", + "COD076_COD076E1bL1_GCF_902167405.1_gadMor3.0_rna_sorted.flagstat", + "COD092_COD092E1bL1i69_GCF_902167405.1_gadMor3.0_rna_sorted.flagstat", + "COD076_COD076E1bL1_L1.fastp.html", + "COD076_COD076E1bL1_L1.fastp.json", + "COD076_COD076E1bL1_L1.fastp.log", + "COD076_COD076E1bL1_L6.fastp.html", + "COD076_COD076E1bL1_L6.fastp.json", + "COD076_COD076E1bL1_L6.fastp.log", + "COD076_COD076E1bL1_L8.fastp.html", + "COD076_COD076E1bL1_L8.fastp.json", + "COD076_COD076E1bL1_L8.fastp.log", + "COD092_COD092E1bL1i69_L6.fastp.html", + "COD092_COD092E1bL1i69_L6.fastp.json", + "COD092_COD092E1bL1i69_L6.fastp.log", + "COD092_COD092E1bL1i69_L7.fastp.html", + "COD092_COD092E1bL1i69_L7.fastp.json", + "COD092_COD092E1bL1i69_L7.fastp.log", + "COD092_COD092E1bL1i69_L8.fastp.html", + "COD092_COD092E1bL1i69_L8.fastp.json", + "COD092_COD092E1bL1i69_L8.fastp.log", + "COD076_COD076E1bL1_L1_fastqc.html", + "COD076_COD076E1bL1_L1_fastqc.zip", + "COD076_COD076E1bL1_L6_fastqc.html", + "COD076_COD076E1bL1_L6_fastqc.zip", + "COD076_COD076E1bL1_L8_fastqc.html", + "COD076_COD076E1bL1_L8_fastqc.zip", + "COD092_COD092E1bL1i69_L6_fastqc.html", + "COD092_COD092E1bL1i69_L6_fastqc.zip", + "COD092_COD092E1bL1i69_L7_fastqc.html", + "COD092_COD092E1bL1i69_L7_fastqc.zip", + "COD092_COD092E1bL1i69_L8_fastqc.html", + "COD092_COD092E1bL1i69_L8_fastqc.zip", + "COD076_COD076E1bL1_L1_1_fastqc.html", + "COD076_COD076E1bL1_L1_1_fastqc.zip", + "COD076_COD076E1bL1_L1_2_fastqc.html", + "COD076_COD076E1bL1_L1_2_fastqc.zip", + "COD076_COD076E1bL1_L6_1_fastqc.html", + "COD076_COD076E1bL1_L6_1_fastqc.zip", + "COD076_COD076E1bL1_L6_2_fastqc.html", + "COD076_COD076E1bL1_L6_2_fastqc.zip", + "COD076_COD076E1bL1_L8_1_fastqc.html", + "COD076_COD076E1bL1_L8_1_fastqc.zip", + "COD076_COD076E1bL1_L8_2_fastqc.html", + "COD076_COD076E1bL1_L8_2_fastqc.zip", + "COD092_COD092E1bL1i69_L6_1_fastqc.html", + "COD092_COD092E1bL1i69_L6_1_fastqc.zip", + "COD092_COD092E1bL1i69_L6_2_fastqc.html", + "COD092_COD092E1bL1i69_L6_2_fastqc.zip", + "COD092_COD092E1bL1i69_L7_1_fastqc.html", + "COD092_COD092E1bL1i69_L7_1_fastqc.zip", + "COD092_COD092E1bL1i69_L7_2_fastqc.html", + "COD092_COD092E1bL1i69_L7_2_fastqc.zip", + "COD092_COD092E1bL1i69_L8_1_fastqc.html", + "COD092_COD092E1bL1i69_L8_1_fastqc.zip", + "COD092_COD092E1bL1i69_L8_2_fastqc.html", + "COD092_COD092E1bL1i69_L8_2_fastqc.zip" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T04:03:22.466351835" + } +} \ No newline at end of file