Skip to content

Commit ab7ebd5

Browse files
committed
WIP
1 parent 248545e commit ab7ebd5

9 files changed

Lines changed: 180 additions & 1 deletion

File tree

.vscode/settings.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
{
2-
"markdown.styles": ["public/vscode_markdown.css"]
2+
"markdown.styles": [
3+
"public/vscode_markdown.css"
4+
],
5+
"nextflow.telemetry.enabled": true
36
}

conf/modules.config

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,4 +1846,23 @@ process {
18461846
]
18471847
]
18481848
}
1849+
//
1850+
// mtDNA haplogroup classification
1851+
//
1852+
withName: HAPLOGREP3_CLASSIFY_MTDNA {
1853+
tag = { "${meta.reference}|${meta.sample_id}" }
1854+
ext.prefix = { "${meta.sample_id}_${meta.reference}" }
1855+
publishDir = [
1856+
[
1857+
path: { "${params.outdir}/mtdna_haplogroup/${meta.reference}/${meta.sample_id}" },
1858+
mode: params.publish_dir_mode,
1859+
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
1860+
]
1861+
]
1862+
}
1863+
1864+
// Add configuration for HAPLOGREP3_CLASSIFY in HAPLOTYPE_HUMAN_MTDNA
1865+
withName: 'NFCORE_EAGER:EAGER:HAPLOTYPE_HUMAN_MTDNA:HAPLOGREP3_CLASSIFY' {
1866+
ext.args = '--tree phylotree-fu-rcrs@1.2'
1867+
}
18491868
}

docs/development/manual_tests.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,3 +1133,23 @@ nextflow run main.nf -profile test,docker --outdir ./results -w work/ -resume --
11331133
## Expect: BAM input shows up in FastQC -> mapping results.
11341134
nextflow run main.nf -profile test,docker --outdir ./results -w work/ --convert_inputbam --skip_deduplication -resume -ansi-log false -dump-channels
11351135
```
1136+
1137+
### MTDNA HAPLOGROUP CLASSIFICATION
1138+
1139+
```bash
1140+
#### MTDNA HAPLOGROUP CLASSIFICATION with default settings
1141+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1142+
## Expect: The haplogroup .txt file contains at minimum columns for rank, name, quality, range, and details of the haplogroup assignment
1143+
nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test --run_mtdna_haplogroup -resume
1144+
1145+
#### MTDNA HAPLOGROUP CLASSIFICATION with specific arguments
1146+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1147+
## Expect: The haplogroup assignment may differ based on the classification settings
1148+
nextflow run main.nf -profile docker,test --outdir ./results/mtdna_haplogroup_test_args --run_mtdna_haplogroup --haplogrep_args '--extend-report' -resume
1149+
1150+
#### MTDNA HAPLOGROUP CLASSIFICATION with custom VCF input
1151+
## Use as input a version of the TSV that contains mitochondrial VCF files
1152+
## Expect: Directory created 'mtdna_haplogroup/<reference>/<sample_id>' containing a .txt file for each sample with haplogroup assignments
1153+
## Expect: The haplogroup assignment should reflect the variants in the input VCF files
1154+
nextflow run main.nf -profile docker,test --input ~/eager_dsl2_testing/input/mtdna/mtdna_vcf_samples.tsv --outdir ./results/mtdna_haplogroup_vcf_test --run_mtdna_haplogroup -resume
1155+
```

modules.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@
180180
"git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
181181
"installed_by": ["modules"]
182182
},
183+
"haplogrep3/classify": {
184+
"branch": "master",
185+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
186+
"installed_by": ["modules"]
187+
},
183188
"kraken2/kraken2": {
184189
"branch": "master",
185190
"git_sha": "653218e79ffa76fde20319e9062f8b8da5cf7555",

nextflow.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@ params {
249249
run_sexdeterrmine = false
250250
sexdeterrmine_bedfile = null
251251

252+
// mtDNA haplogroup classification
253+
run_mtdna_haplogroup = false
254+
252255
// Genotyping
253256
run_genotyping = false
254257
genotyping_tool = null

nextflow_schema.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,21 @@
15901590
},
15911591
"fa_icon": "fas fa-transgender-alt",
15921592
"help_text": ""
1593+
},
1594+
"mtdna_haplogroup_options": {
1595+
"title": "mtDNA Haplogroup Classification",
1596+
"type": "object",
1597+
"description": "Options for classifying mitochondrial haplogroups using Haplogrep3.",
1598+
"default": "",
1599+
"fa_icon": "fas fa-dna",
1600+
"properties": {
1601+
"run_mtdna_haplogroup": {
1602+
"type": "boolean",
1603+
"description": "Run Haplogrep3 to determine mitochondrial haplogroups from VCF files.",
1604+
"fa_icon": "fas fa-dna",
1605+
"default": false
1606+
}
1607+
}
15931608
}
15941609
},
15951610
"allOf": [
@@ -1646,6 +1661,9 @@
16461661
},
16471662
{
16481663
"$ref": "#/$defs/human_sex_determination"
1664+
},
1665+
{
1666+
"$ref": "#/$defs/mtdna_haplogroup_options"
16491667
}
16501668
]
16511669
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//
2+
// Run classify mtdna haplogroup
3+
//
4+
5+
include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main'
6+
7+
include { HAPLOGREP3_CLASSIFY as HAPLOGREP3_CLASSIFY_MTDNA } from '../../modules/nf-core/haplogrep3/classify/main'
8+
9+
workflow CLASSIFY_MTDNA_HAPLOGROUP {
10+
11+
take:
12+
mtdna_haplogroup_bam // channel: [ val(meta1), [ bam ], [ bai ] ]
13+
14+
main:
15+
ch_versions = Channel.empty()
16+
ch_multiqc_files = Channel.empty()
17+
ch_haplogroups = Channel.empty()
18+
19+
if ( params.run_mtdna_haplogroup ) {
20+
// Prepare input for haplogrep3
21+
// The module requires a tuple with [meta, inputfile]
22+
// We already have [meta, bam, bai] from the input channel
23+
24+
ch_input_haplogrep3 = mtdna_haplogroup_bam
25+
.map {
26+
// Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute
27+
addNewMetaFromAttributes( it, "reference" , "reference" , false )
28+
}
29+
.map { meta, bam, bai ->
30+
[meta, bam]
31+
}
32+
33+
// Run mtDNA haplogroup classification with haplogrep3
34+
HAPLOGREP3_CLASSIFY_MTDNA(ch_input_haplogrep3)
35+
ch_haplogroups = HAPLOGREP3_CLASSIFY_MTDNA.out.txt
36+
ch_versions = ch_versions.mix(HAPLOGREP3_CLASSIFY_MTDNA.out.versions)
37+
}
38+
39+
emit:
40+
haplogroups = ch_haplogroups // channel: [ val(meta), path("*.txt") ]
41+
versions = ch_versions // channel: path(versions.yml)
42+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
2+
// https://github.com/nf-core/modules/tree/master/subworkflows
3+
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
4+
// https://nf-co.re/join
5+
// TODO nf-core: A subworkflow SHOULD import at least two modules
6+
7+
include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main'
8+
include { HAPLOGREP3_CLASSIFY } from '../../modules/nf-core/haplogrep3/classify/main'
9+
10+
workflow HAPLOTYPE_HUMAN_MTDNA {
11+
12+
take:
13+
ch_mtdna_vcf // channel: [ val(meta), [ vcf ] ]
14+
15+
main:
16+
ch_versions = Channel.empty()
17+
ch_haplogroups = Channel.empty()
18+
19+
// Prepare input for haplogrep3
20+
// The module requires a tuple with [meta, inputfile]
21+
ch_input_haplogrep3 = ch_mtdna_vcf
22+
23+
// Run mtDNA haplogroup classification with haplogrep3
24+
HAPLOGREP3_CLASSIFY(ch_input_haplogrep3)
25+
ch_haplogroups = HAPLOGREP3_CLASSIFY.out.txt
26+
ch_versions = ch_versions.mix(HAPLOGREP3_CLASSIFY.out.versions)
27+
28+
emit:
29+
haplogroups = ch_haplogroups // channel: [ val(meta), path("*.txt") ]
30+
versions = ch_versions // channel: path(versions.yml)
31+
}

workflows/eager.nf

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ include { METAGENOMICS } from '../subwork
3232
include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination'
3333
include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage'
3434
include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination'
35+
include { HAPLOTYPE_HUMAN_MTDNA } from '../subworkflows/local/haplotype_human_mtdna'
3536
include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries'
3637
include { MERGE_LIBRARIES as MERGE_LIBRARIES_GENOTYPING } from '../subworkflows/local/merge_libraries'
3738
include { GENOTYPE } from '../subworkflows/local/genotype'
@@ -560,6 +561,43 @@ workflow EAGER {
560561
ch_multiqc_files = ch_multiqc_files.mix(GENOTYPE.out.mqc.collect { it[1] }.ifEmpty([]))
561562
}
562563

564+
//
565+
// SUBWORKFLOW: Run mtDNA Haplogroup Classification
566+
//
567+
568+
if (params.run_mtdna_haplogroup) {
569+
// Ensure genotyping has run, as we need its VCF output
570+
if (!params.run_genotyping) {
571+
error "Cannot run mtDNA haplogroup classification (--run_mtdna_haplogroup) without running genotyping (--run_genotyping). VCF files are required as input."
572+
}
573+
574+
// Filter the VCFs from genotyping to only include those matching the mitochondrial header
575+
ch_mito_header_for_filter = REFERENCE_INDEXING.out.mitochondrion_header
576+
.map { meta, header -> [ meta.id, header ] }
577+
578+
ch_mtdna_haplogroup_input = GENOTYPE.out.vcf
579+
.map { meta, vcf, tbi ->
580+
// Need meta.reference to filter based on mito header
581+
def reference_id = meta.reference
582+
[ reference_id, meta, vcf ]
583+
}
584+
.join(ch_mito_header_for_filter) // Join by reference_id
585+
.filter { ref_id, meta, vcf, mito_header ->
586+
// Keep only VCFs where the reference matches the expected mitochondrial header
587+
// This assumes mito_header contains the specific contig name for mtDNA
588+
// We might need a more robust check depending on mito_header content
589+
vcf.name.contains(meta.id) // Basic check if VCF name includes sample ID - adjust filter logic as needed
590+
// TODO: Refine filter logic based on actual mito_header content and VCF naming conventions
591+
}
592+
.map { ref_id, meta, vcf, mito_header ->
593+
// Reformat to the expected [meta, vcf] structure for the subworkflow
594+
[ meta, vcf ]
595+
}
596+
597+
HAPLOTYPE_HUMAN_MTDNA(ch_mtdna_haplogroup_input)
598+
ch_versions = ch_versions.mix(HAPLOTYPE_HUMAN_MTDNA.out.versions)
599+
}
600+
563601
//
564602
// Collate and save software versions
565603
//

0 commit comments

Comments
 (0)