Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
587266f
adding mva to consensus sequence workflow
aidaanva Mar 7, 2025
7086813
mva executing
aidaanva Mar 28, 2025
6c6ec9e
moved channel manipulation to subworkflow
aidaanva Apr 4, 2025
0f60625
added bgzip, reverted gatk3
aidaanva Apr 4, 2025
4e1f5a3
adding vcfs in input sheet
aidaanva May 2, 2025
ac50fc0
Merge branch 'dev' into mva
aidaanva May 2, 2025
a828f00
removed additional vcfs from multiref_indexing
aidaanva May 2, 2025
d7bb376
Merge branch 'mva' of github.com:aidaanva/eager into mva
aidaanva May 2, 2025
3df49f1
Merge branch 'mva' of github.com:aidaanva/eager into mva
aidaanva May 2, 2025
fe8dd1d
remove unnecessary files
aidaanva May 2, 2025
4ac34ce
latest changes
aidaanva May 9, 2025
a0a5c37
Fix join bug, uncomment consensus sequence to continue debugging
jfy133 May 9, 2025
1c37caa
Merge branch 'dev' into mva
aidaanva Aug 22, 2025
4021d21
adding testing
aidaanva Feb 13, 2026
dd81b54
Merge branch 'dev' into mva
TCLamnidis Feb 13, 2026
b2de835
Merge pull request #1142 from aidaanva/mva
TCLamnidis Feb 13, 2026
97542e7
linting
TCLamnidis Feb 13, 2026
5086423
fix warning sign emojis. again...
TCLamnidis Feb 13, 2026
1717b0d
remove on-the-fly module calls
TCLamnidis Feb 13, 2026
f4bd73b
update easter egg formatting. just prettier
TCLamnidis Feb 20, 2026
da2e4f7
add notes and todos. remove potentially unnecessary variable
TCLamnidis Feb 20, 2026
4f31b41
update MVCFA to 0.88.1
TCLamnidis Mar 13, 2026
cc1c698
add consesus sequence input ifEmpty when no VCFs provided in samplesheet
TCLamnidis Mar 17, 2026
bbbcafa
remove deprecated option
TCLamnidis Mar 17, 2026
47f6c19
managed to stitch MVCFA in
TCLamnidis Mar 17, 2026
0e62768
add comment code
TCLamnidis Mar 17, 2026
4ecaa88
update rocrate
TCLamnidis Mar 17, 2026
0a9d3a2
update manual_test for mvcfa
TCLamnidis Mar 18, 2026
2df5feb
remove debug println
TCLamnidis Mar 18, 2026
b1bb773
fix multiref mva files
TCLamnidis Mar 18, 2026
bba8e60
mva outputs in tool subdir. also publish fastas
TCLamnidis Mar 18, 2026
c17a068
implement mva params in fasta_schema
TCLamnidis Mar 18, 2026
e2f3505
update manual tests for mva
TCLamnidis Mar 18, 2026
09a511d
update multivcfanalyzer module
TCLamnidis Mar 18, 2026
09ed479
linting
TCLamnidis Mar 18, 2026
5587263
linting
TCLamnidis Mar 20, 2026
b56e299
fix easter egg patch
TCLamnidis Mar 26, 2026
d26b177
add multivcfanalyzer to test profile
TCLamnidis Mar 26, 2026
827f35c
added proper testing of consensus sequence output
TCLamnidis Mar 26, 2026
d6a6dd9
unstable content in MVA outputs
TCLamnidis Mar 27, 2026
1dad2d6
add MVA output explanation
TCLamnidis Apr 17, 2026
4b70956
prettier
TCLamnidis Apr 17, 2026
741dd73
Apply suggestions from code review
TCLamnidis Apr 17, 2026
73468ce
add parameter validation checks for mva
TCLamnidis Apr 17, 2026
3d4a512
move MVA downstream inputs to data output folder
TCLamnidis Apr 17, 2026
8497c6c
update manual tests.
TCLamnidis Apr 17, 2026
d867c89
MVA: move downstream input tables to data from stats
TCLamnidis Apr 17, 2026
8f77163
Merge branch 'mva' of github.com:nf-core/eager into mva
TCLamnidis Apr 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions assets/schema_fasta.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,27 @@
"pattern": "^\\S+\\.vcf$",
"exists": true,
"errorMessage": "SNP annotation files for GATK must not contain any spaces and have file extension '.vcf'."
},
"consensus_multivcfanalyzer_reference_gff_annotations": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.gff(\\.gz)?$",
"exists": true,
"errorMessage": "GFF annotation files for MultiVCFAnalyzer must not contain any spaces and have file extensions '.gff' or '.gff.gz'."
},
"consensus_multivcfanalyzer_reference_gff_exclude": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.bed(\\.gz)?$",
"exists": true,
"errorMessage": "BED files for exclusion in MultiVCFAnalyzer must not contain any spaces and have file extensions '.bed' or '.bed.gz'."
},
"consensus_multivcfanalyzer_reference_snpeff_results": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.txt(\\.gz)?$",
"exists": true,
"errorMessage": "SNPeff results files for MultiVCFAnalyzer must not contain any spaces and have file extensions '.txt' or '.txt.gz'."
}
},
"required": ["reference_name", "fasta"],
Expand Down
22 changes: 20 additions & 2 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@
"type": "string",
"meta": ["bam_reference_id"],
"errorMessage": "A BAM reference ID (corresponding to what is supplied to `--fasta`) must always be provided when providing a BAM file."
},
"vcf": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.vcf.gz$",
"exists": true,
"errorMessage": "VCFs files cannot contain any spaces and must have extension '.vcf.gz' and be gzip compressed."
},
"vcf_reference_id": {
"type": "string",
"meta": ["vcf_reference_id"],
"errorMessage": "A VCF reference ID (corresponding to what is supplied to `--fasta`) must always be provided when providing a VCF file."
}
},
"required": [
Expand All @@ -90,18 +102,24 @@
},
{
"required": ["bam"]
},
{
"required": ["vcf"]
}
],
"dependentRequired": {
"r2": ["r1"],
"bam": ["bam_reference_id"],
"bam_reference_id": ["bam"]
"bam_reference_id": ["bam"],
"vcf": ["vcf_reference_id"],
"vcf_reference_id": ["vcf"]
}
},
"allOf": [
{ "uniqueEntries": ["lane", "library_id"] },
{ "uniqueEntries": "r1" },
{ "uniqueEntries": "r2" },
{ "uniqueEntries": "bam" }
{ "uniqueEntries": "bam" },
{ "uniqueEntries": "vcf" }
]
}
38 changes: 38 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -1757,4 +1757,42 @@ process {
]
]
}

withName: UG_BGZIP {
tag = { "${meta.reference}|${meta.sample_id}" }
ext.args = "-d"
}

withName: REF_MVA_GUNZIP {
tag = { "${meta.reference}" }
ext.args = "-d"
}

withName: MULTIVCFANALYZER {
tag = { "${meta.reference}" }
ext.prefix = { "multivcfanalyzer_${meta.reference}" }
publishDir = [
[
// data
Comment thread
TCLamnidis marked this conversation as resolved.
path: { "${params.outdir}/consensus_sequence/multivcfanalyzer/data/" },
mode: params.publish_dir_mode,
enabled: true,
pattern: '*.fasta.gz'
],
[
// data
path: { "${params.outdir}/consensus_sequence/multivcfanalyzer/data/" },
mode: params.publish_dir_mode,
enabled: true,
pattern: '*{snpTable,structure}*.tsv'
],
[
// stats
path: { "${params.outdir}/consensus_sequence/multivcfanalyzer/stats/" },
mode: params.publish_dir_mode,
enabled: true,
pattern: '*{.txt,snpStatistics.tsv}'
]
]
}
}
5 changes: 5 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,9 @@ params {
metagenomics_profiling_tool = 'metaphlan'
metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/metaphlan/metaphlan4_database.tar.gz'
metagenomics_run_postprocessing = true

// Consensus sequence generation
run_consensus_sequence = true
consensus_tool = 'multivcfanalyzer'
consensus_multivcfanalyzer_write_allele_frequencies = true
}
18 changes: 18 additions & 0 deletions docs/development/manual_tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -1123,3 +1123,21 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume --
## Expect: BAM input shows up in FastQC -> mapping results.
nextflow run main.nf -profile test,docker --outdir ./results -w work/ --convert_inputbam --skip_deduplication -resume -ansi-log false -dump-channels
```

# MultiVCFAnalyzer

Based on GATK_UG test, but with added consensus sequence.

```bash
## Gatk UG on raw reads
## Expect: MVCFA runs. Results include three fastq.gz files in the data section, 3 snpTable TSV and 2 strcutureGenotypes TSV files in data section, two tsv/txt files in the stats section of the results.
nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --genotyping_gatk_ug_keeprealignbam -ansi-log false -dump-channels --run_consensus_sequence --consensus_tool 'multivcfanalyzer'
```

Based on test_microbial, but forcing GATK_UG genotyping. Multi Reference.

```bash
## Gatk UG on raw reads. Use BWA because circularmapper runs into issues with BWA, due to difference in MT chrom length. Custom input to test multiple samples/lanes.
## Expect: MVCFA runs. Two sets of outputs, one per reference. MVCFA runs into potential out of memory issues with the multi contig reference hs37d5_chr22-MT, so I had to run this on an HPC, with increased resources.
nextflow run main.nf -profile test_microbial,eva_grace --outdir ./results -w work/ -resume --run_genotyping --genotyping_tool 'ug' --genotyping_source 'raw' --genotyping_gatk_ug_keeprealignbam -ansi-log false -dump-channels --run_consensus_sequence --consensus_tool 'multivcfanalyzer' --mapping_tool 'bwaaln' --input "https://github.com/nf-core/test-datasets/raw/refs/heads/eager/testdata/Mammoth/samplesheet_v3.tsv" --deduplication_tool 'markduplicates'
```
25 changes: 25 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -752,3 +752,28 @@ When using pileupCaller for genotyping, single-stranded and double-stranded libr
</details>

[ANGSD](http://www.popgen.dk/angsd/index.php/ANGSD) is a software for analyzing next generation sequencing data. It can estimate genotype likelihoods and allele frequencies from next-generation sequencing data. The output provided is a bgzipped genotype likelihood file, containing likelihoods across all samples per reference. Users can specify the model used for genotype likelihood estimation, as well as the output format. For more information on the available options, see the [ANGSD](https://www.popgen.dk/angsd/index.php/Genotype_Likelihoods).

### Consensus calling

#### MultiVCFAnalyzer

<details markdown="1">
<summary>Output files</summary>

- `consensus_sequence/multivcfanalyzer/`
- `data/`
- `*fullAlignment.fasta.gz`: FASTA file of all positions contained in the VCF files i.e. including ref calls
- `*snpAlignment.fasta.gz`: FASTA file of only SNP positions including only the calls of the samples.
- `*snpAlignmentIncludingRefGenome.fasta.gz`: FASTA file of just SNP positions including reference genome calls.
- `stats/`
- `*info.txt`: File with information about the run
- `*snpStatistics.tsv`: File containing basic statistics about the SNP calls of each sample.
- `*snpTableForSnpEff.tsv`: Input file for SnpEff.
- `*snpTable.tsv`: SNP table of combined positions taken from each VCF file, in TSV format.
- `*snpTableWithUncertaintyCalls.tsv`: SNP table of combined positions taken from each VCF file, in TSV format, but with lower case characters indicating uncertain calls
- `*structureGenotypes_noMissingData-Columns.tsv`: Alternate input file for STRUCTURE.
- `*structureGenotypes.tsv`: Input file for STRUCTURE.

</details>

[MultiVCFAnalyzer](https://github.com/alexherbig/MultiVCFAnalyzer) is a SNP filtering and SNP alignment generation tool, designed around (but not limited to) low coverage ancient DNA data. MultiVCFanalyzer reads multiple VCF files as produced by GATK UnifiedGenotyper, performs filtering based on a number of criteria, and provides the combined genotype calls in a number of formats that are suitable for follow-up analyses such as phylogenetic reconstruction, SNP effect analyses, population genetic analyses etc. Furthermore, the results are provided in the form of various tables for manual inspection and presentation/publication purposes.
5 changes: 4 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ workflow NFCORE_EAGER {
take:
samplesheet_fastqs // channel: samplesheet read in from --input
samplesheet_bams
samplesheet_vcfs

main:

Expand All @@ -53,7 +54,8 @@ workflow NFCORE_EAGER {
//
EAGER (
samplesheet_fastqs,
samplesheet_bams
samplesheet_bams,
samplesheet_vcfs
)
emit:
multiqc_report = EAGER.out.multiqc_report // channel: /path/to/multiqc_report.html
Expand Down Expand Up @@ -88,6 +90,7 @@ workflow {
NFCORE_EAGER (
PIPELINE_INITIALISATION.out.samplesheet_fastqs,
PIPELINE_INITIALISATION.out.samplesheet_bams,
PIPELINE_INITIALISATION.out.samplesheet_vcfs
)
//
// SUBWORKFLOW: Run completion tasks
Expand Down
10 changes: 10 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@
"git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49",
"installed_by": ["modules"]
},
"multivcfanalyzer": {
"branch": "master",
"git_sha": "de26d9dc50ba49a6ffaac64d473f784ab97a9d61",
"installed_by": ["modules"]
},
"picard/createsequencedictionary": {
"branch": "master",
"git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447",
Expand Down Expand Up @@ -340,6 +345,11 @@
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
},
"tabix/bgzip": {
"branch": "master",
"git_sha": "05954dab2ff481bcb999f24455da29a5828af08d",
"installed_by": ["modules"]
},
"taxpasta/merge": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/multivcfanalyzer/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

87 changes: 87 additions & 0 deletions modules/nf-core/multivcfanalyzer/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading