Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions modules/local/freebayes.nf
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a future update could include running freebayes on only the target regions but if I recall we had some issues scaling that up and ended up running it on everything then filtering after.

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process freebayes {
label 'process_high'
conda "bioconda::freebayes=1.3.6 bioconda::bcftools=1.21"
tag "$library"

input:
tuple val(library), path(bam), path(bai)
path(fasta)
path(fai)
val(target_bed)

output:
tuple val(library), path("*.vcf.gz"), path("*.vcf.gz.tbi"), emit: vcf
tuple val(library), path("*.flt.vcf.gz"), path("*.flt.vcf.gz.tbi"), emit: filtered_vcf

script:

"""
freebayes-parallel <(fasta_generate_regions.py ${fai} 100000) $task.cpus \\
-f $fasta \\
$bam \\
--min-base-quality 1 \\
> ${library}.vcf

cat ${library}.vcf | bcftools view -i 'QUAL > $params.freebayes_qual_filter' > ${library}.flt.vcf
bgzip ${library}.flt.vcf
tabix ${library}.flt.vcf.gz

bgzip ${library}.vcf
tabix ${library}.vcf.gz

"""
}
1 change: 0 additions & 1 deletion modules/local/happy_concordance.nf
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

going to default to outputting to the process name, so strelka and freebayes go to separate folders

Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
process happyConcordance {
label 'process_high'
conda "bioconda::hap.py=0.3.15"
publishDir "$params.outdir/happy/", mode: 'copy'
tag { library }

input:
Expand Down
26 changes: 1 addition & 25 deletions modules/local/parse_happy_vcf.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
process parseHappyVcf {
label 'process_single'
publishDir "$params.outdir/happy/", mode: 'copy'
tag { library }

input:
Expand Down Expand Up @@ -38,27 +37,4 @@ process parseHappyVcf {
done

"""
}
// echo "Count Ref Alt Type" > !{library}.happy.snp_mutations.txt

// if [[ -n "!{happy_bed}" ]]; then
// regions_opt='grep "CONF" |'
// else
// regions_opt=''
// fi

// zcat *.vcf.gz | grep "SNP" | $regions_opt grep -v NoPass | grep TP |\
// awk -v FS='\t' -v OFS=' ' '{print \$4, \$5, "Correct"}' \
// | sort | uniq -c |sed 's/^[ \\t]*//' >> !{library}.happy.snp_mutations.txt

// zcat *.vcf.gz | grep "SNP" | $regions_opt grep -v NoPass | grep FN | grep -v NOCALL |\
// awk -v FS='\t' -v OFS=' ' '{print \$4, \$5, "Incorrect"}' \
// | sort | uniq -c |sed 's/^[ \\t]*//' >> !{library}.happy.snp_mutations.txt

// zcat *.vcf.gz | grep "SNP" | $regions_opt grep -v NoPass | grep FN | grep NOCALL |\
// awk -v FS='\t' -v OFS=' ' '{print \$4, \$5, "Missed"}' \
// | sort | uniq -c |sed 's/^[ \\t]*//' >> !{library}.happy.snp_mutations.txt

// zcat *.vcf.gz | grep "SNP" | $regions_opt grep NoPass | grep TP |\
// awk -v FS='\t' -v OFS=' ' '{print \$4, \$5, "Missed"}' \
// | sort | uniq -c |sed 's/^[ \\t]*//' >> !{library}.happy.snp_mutations.txt
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ params {
outdir = 'results'
tracedir = "${params.outdir}/pipeline_info"
run_strelka = true
run_freebayes = true
freebayes_qual_filter = 15 // default quality filter for freebayes output.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

freebayes didn't have a standard quality filtering like strelka, so we used a basic quality filter to discard lots of false positive variants before running the concordance analysis.

run_happy = true
target_bed = null
happy_truth_vcf = null
Expand Down
16 changes: 11 additions & 5 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ nextflow_pipeline {
def calcmd_bam = path("${launchDir}/test_output/calcMD/200ng-V2-1.md.calcmd.bam").md5
def masked_bam = path("${launchDir}/test_output/revelio/200ng-V2-1.md.masked.bam").md5
def strelka_vcf = path("${launchDir}/test_output/strelka/200ng-V2-1.md.strelka_vcf.gz").vcf.summary
def happy_vcf = path("${launchDir}/test_output/happy/200ng-V2-1.md_vs_truth.vcf.happy.vcf.gz").vcf.summary
def happy_summary = path("${launchDir}/test_output/happy/200ng-V2-1.md_vs_truth.vcf.happy.summary.csv").text.tokenize('\n')
def snp_breakdown = path("${launchDir}/test_output/happy/200ng-V2-1.md.happy.snp_mutations.txt").text.tokenize('\n')
def freebayes_vcf = path("${launchDir}/test_output/freebayes/200ng-V2-1.md.flt.vcf.gz").vcf.summary
def happy_vcf_strelka = path("${launchDir}/test_output/happyConcordanceStrelka/200ng-V2-1.md_vs_truth.vcf.happy.vcf.gz").vcf.summary
def happy_summary_strelka = path("${launchDir}/test_output/happyConcordanceFreebayes/200ng-V2-1.md_vs_truth.vcf.happy.summary.csv").text.tokenize('\n')
def happy_vcf_freebayes = path("${launchDir}/test_output/happyConcordanceFreebayes/200ng-V2-1.md_vs_truth.vcf.happy.vcf.gz").vcf.summary
def happy_summary_freebayes = path("${launchDir}/test_output/happyConcordanceFreebayes/200ng-V2-1.md_vs_truth.vcf.happy.summary.csv").text.tokenize('\n')
def snp_breakdown = path("${launchDir}/test_output/parseHappyVcf/200ng-V2-1.md.happy.snp_mutations.txt").text.tokenize('\n')

assertAll(
{ assert workflow.success },
Expand All @@ -32,8 +35,11 @@ nextflow_pipeline {
["calcmd_bam", calcmd_bam],
["masked_bam", masked_bam],
["strelka_vcf", strelka_vcf],
["happy_vcf", happy_vcf],
["happy_summary", happy_summary],
["freebayes_vcf", freebayes_vcf],
["happy_vcf_strelka", happy_vcf_strelka],
["happy_summary_strelka", happy_summary_strelka],
["happy_vcf_freebayes", happy_vcf_freebayes],
["happy_summary_freebayes", happy_summary_freebayes],
["snp_breakdown", snp_breakdown]
).match()
}
Expand Down
34 changes: 27 additions & 7 deletions tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"content": [
{
"tasksFailed": 0,
"tasksCount": 11,
"tasksSucceeded": 11
"tasksCount": 15,
"tasksSucceeded": 15
},
[
"calcmd_bam",
Expand All @@ -19,15 +19,35 @@
"VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=23, phased=false, phasedAutodetect=false]"
],
[
"happy_vcf",
"freebayes_vcf",
"VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=11, phased=false, phasedAutodetect=false]"
],
[
"happy_vcf_strelka",
"VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=23, phased=false, phasedAutodetect=false]"
],
[
"happy_summary",
"happy_summary_strelka",
[
"Type,Filter,TRUTH.TOTAL,TRUTH.TP,TRUTH.FN,QUERY.TOTAL,QUERY.FP,QUERY.UNK,FP.gt,FP.al,METRIC.Recall,METRIC.Precision,METRIC.Frac_NA,METRIC.F1_Score,TRUTH.TOTAL.TiTv_ratio,QUERY.TOTAL.TiTv_ratio,TRUTH.TOTAL.het_hom_ratio,QUERY.TOTAL.het_hom_ratio",
"INDEL,ALL,0,0,0,2,2,0,0,1,0.0,0.0,0.0,,,,,",
"INDEL,PASS,0,0,0,2,2,0,0,1,0.0,0.0,0.0,,,,,",
"SNP,ALL,1,1,0,14,13,0,0,1,1.0,0.07142899999999999,0.0,0.13333299999999998,0.0,0.75,0.0,1.3333333333333333",
"SNP,PASS,1,1,0,14,13,0,0,1,1.0,0.07142899999999999,0.0,0.13333299999999998,0.0,0.75,0.0,1.3333333333333333"
]
],
[
"happy_vcf_freebayes",
"VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=16, phased=false, phasedAutodetect=false]"
],
[
"happy_summary_freebayes",
[
"Type,Filter,TRUTH.TOTAL,TRUTH.TP,TRUTH.FN,QUERY.TOTAL,QUERY.FP,QUERY.UNK,FP.gt,FP.al,METRIC.Recall,METRIC.Precision,METRIC.Frac_NA,METRIC.F1_Score,TRUTH.TOTAL.TiTv_ratio,QUERY.TOTAL.TiTv_ratio,TRUTH.TOTAL.het_hom_ratio,QUERY.TOTAL.het_hom_ratio",
"SNP,ALL,1,1,0,23,22,0,0,0,1.0,0.043477999999999996,0.0,0.083333,0.0,0.5333333333333333,0.0,22.0",
"SNP,PASS,1,1,0,4,3,0,0,0,1.0,0.25,0.0,0.4,0.0,0.0,0.0,3.0"
"INDEL,ALL,0,0,0,2,2,0,0,1,0.0,0.0,0.0,,,,,",
"INDEL,PASS,0,0,0,2,2,0,0,1,0.0,0.0,0.0,,,,,",
"SNP,ALL,1,1,0,14,13,0,0,1,1.0,0.07142899999999999,0.0,0.13333299999999998,0.0,0.75,0.0,1.3333333333333333",
"SNP,PASS,1,1,0,14,13,0,0,1,1.0,0.07142899999999999,0.0,0.13333299999999998,0.0,0.75,0.0,1.3333333333333333"
]
],
[
Expand All @@ -42,6 +62,6 @@
"nf-test": "0.9.0",
"nextflow": "24.04.2"
},
"timestamp": "2025-04-15T11:26:45.336212952"
"timestamp": "2025-04-17T16:25:23.194977669"
}
}
36 changes: 32 additions & 4 deletions workflows/emseq_variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ include { downloadRevelio } from '../modules/local/download_revelio.nf'
include { calcMD } from '../modules/local/calc_md.nf'
include { revelio } from '../modules/local/revelio.nf'
include { strelka } from '../modules/local/strelka.nf'
include { happyConcordance } from '../modules/local/happy_concordance.nf'
include { freebayes } from '../modules/local/freebayes.nf'
include { happyConcordance as happyConcordanceStrelka;
happyConcordance as happyConcordanceFreebayes } from '../modules/local/happy_concordance.nf'
include { parseHappyVcf } from '../modules/local/parse_happy_vcf.nf'

workflow emseq_variant_calling {
Expand Down Expand Up @@ -72,13 +74,13 @@ workflow emseq_variant_calling {
fai,
target_bed // call regions (optional)
)

//
// Module: Run hap.py to compare variants to a "truth" vcf
//
if (params.run_happy) {

happyConcordance(
happyConcordanceStrelka(
strelka.out.vcf,
happy_bed, // comparison regions (optional)
fasta,
Expand All @@ -91,13 +93,39 @@ workflow emseq_variant_calling {
// Module: Parse hap.py vcf output to break down SNPs by ref/alt bases.
//
parseHappyVcf(
happyConcordance.out.vcf,
happyConcordanceStrelka.out.vcf,
happy_bed // analyze breakdown in called regions if provided
)
}

}

//
// Module: Run freebayes to call germline variants
//
if (params.run_freebayes) {
freebayes (
revelio.out.masked,
fasta,
fai,
target_bed // call regions (optional)
)

//
// Module: Run hap.py to compare variants to a "truth" vcf
//
if (params.run_happy) {

happyConcordanceFreebayes(
freebayes.out.filtered_vcf,
happy_bed, // comparison regions (optional)
fasta,
fai,
happy_truth_vcf,
happy_truth_tbi
)
}
}
}

workflow {
Expand Down
Loading