Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified doc/plots/iGenVar_only/results.DEL.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/plots/iGenVar_only/results.INS.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/plots/iGenVar_only/results.all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/plots/iGenVar_only/results.no_DUP_and_INV.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
sample = config["parameters"]["sample"],
sample_HG002 = config["parameters"]["sample_HG002"],
sample_NA12878 = config["parameters"]["sample_NA12878"],
min_var_length = config["parameters"]["min_var_length"],
max_var_length = config["parameters"]["max_var_length"]

Expand All @@ -13,111 +14,149 @@ rule run_iGenVar:
short_bam = config["short_read_bam"]["s1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S2': # Illumina Mate Pair
short_bam = config["short_read_bam"]["s2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S3': # Illumina HiSeq
short_bam = config["short_read_bam"]["s3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S4': # NA12878 WGS GRCh38
short_bam = config["short_read_bam"]["s4"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample_NA12878} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S1L1': # Illumina Paired End & MtSinai PacBio
short_bam = config["short_read_bam"]["s1"],
long_bam = config["long_read_bam"]["l1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S2L1': # Illumina Mate Pair & MtSinai PacBio
short_bam = config["short_read_bam"]["s2"],
long_bam = config["long_read_bam"]["l1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S3L1': # Illumina HiSeq & MtSinai PacBio
short_bam = config["short_read_bam"]["s3"],
long_bam = config["long_read_bam"]["l1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S1L2': # Illumina Paired End & PacBio CCS
short_bam = config["short_read_bam"]["s1"],
long_bam = config["long_read_bam"]["l2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S2L2': # Illumina Mate Pair & PacBio CCS
short_bam = config["short_read_bam"]["s2"],
long_bam = config["long_read_bam"]["l2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S3L2': # Illumina HiSeq & PacBio CCS
short_bam = config["short_read_bam"]["s3"],
long_bam = config["long_read_bam"]["l2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S1L3': # Illumina Paired End & 10X Genomics
short_bam = config["short_read_bam"]["s1"],
long_bam = config["long_read_bam"]["l3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S2L3': # Illumina Mate Pair & 10X Genomics
short_bam = config["short_read_bam"]["s2"],
long_bam = config["long_read_bam"]["l3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'S3L3': # Illumina HiSeq & 10X Genomics
short_bam = config["short_read_bam"]["s3"],
long_bam = config["long_read_bam"]["l3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'L1': # MtSinai PacBio
long_bam = config["long_read_bam"]["l1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'L2': # PacBio CCS
long_bam = config["long_read_bam"]["l2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'L3': # 10X Genomics
long_bam = config["long_read_bam"]["l3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_long_reads {long_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'hg38_Sim_default': # Illumina
short_bam = config["simulated_short_read_bam"]["sim1"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'hg38_Sim_InDel': # Illumina
short_bam = config["simulated_short_read_bam"]["sim2"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
elif wildcards.input_combination == 'hg38_Sim_noSNP': # Illumina
short_bam = config["simulated_short_read_bam"]["sim3"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
else: # wildcards.input_combination == 'hg38_Sim_SNPandSV': # Illumina
short_bam = config["simulated_short_read_bam"]["sim4"]
shell("""
/usr/bin/time -v ./build/iGenVar/bin/iGenVar --input_short_reads {short_bam} \
--output {output.vcf} --vcf_sample_name {sample} --threads {threads} \
--output {output.vcf} --vcf_sample_name {sample_HG002} --threads {threads} \
--min_var_length {min_var_length} --max_var_length {max_var_length} --min_qual 1 &>> {log}
""")
# Defaults:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ rule truvari_DEL:
"logs/truvari/truvari_output.{input_combination}.{min_qual}.log"
params:
output_dir = "results/caller_comparison_iGenVar_only/eval/{input_combination}/DEL.min_qual_{min_qual}",
truth_set_gz = config["truth_set_DEL_renamed_chr"]["gz"],
truth_set_bed = config["truth_set_renamed_chr"]["bed"]
truth_set_gz = config["truth_set_HG002_DEL_renamed_chr"]["gz"],
truth_set_bed = config["truth_set_HG002_renamed_chr"]["bed"]
shell:
"""
rm -rf {params.output_dir} && truvari bench -b {params.truth_set_gz} -c {input.vcf} -o {params.output_dir} -p 0 \
Expand All @@ -58,6 +58,10 @@ rule cat_truvari_results_DEL:
min_qual = min_qual_iGenVar),
S2 = expand("results/caller_comparison_iGenVar_only/eval/S2/DEL.min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
S3 = expand("results/caller_comparison_iGenVar_only/eval/S3/min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
# S4 = expand("results/caller_comparison_iGenVar_only/eval/S4/min_qual_{min_qual}/pr_rec.txt",
# min_qual = min_qual_iGenVar),
L1 = expand("results/caller_comparison_iGenVar_only/eval/L1/DEL.min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
L2 = expand("results/caller_comparison_iGenVar_only/eval/L2/DEL.min_qual_{min_qual}/pr_rec.txt",
Expand All @@ -67,6 +71,8 @@ rule cat_truvari_results_DEL:
output:
S1 = temp("results/caller_comparison_iGenVar_only/eval/S1.DEL_results.txt"),
S2 = temp("results/caller_comparison_iGenVar_only/eval/S2.DEL_results.txt"),
S3 = temp("results/caller_comparison_iGenVar_only/eval/S3.all_results.txt"),
# S4 = temp("results/caller_comparison_iGenVar_only/eval/S4.all_results.txt"),
L1 = temp("results/caller_comparison_iGenVar_only/eval/L1.DEL_results.txt"),
L2 = temp("results/caller_comparison_iGenVar_only/eval/L2.DEL_results.txt"),
L3 = temp("results/caller_comparison_iGenVar_only/eval/L3.DEL_results.txt"),
Expand All @@ -75,10 +81,16 @@ rule cat_truvari_results_DEL:
run:
shell("cat {input.S1} > {output.S1}")
shell("cat {input.S2} > {output.S2}")
shell("cat {input.S3} > {output.S3}")
# shell("cat {input.S4} > {output.S4}")
shell("cat {input.L1} > {output.L1}")
shell("cat {input.L2} > {output.L2}")
shell("cat {input.L3} > {output.L3}")
shell("""
cat {output.S1} {output.S2} \
cat {output.S1} {output.S2} {output.S3} \
{output.L1} {output.L2} {output.L3} > {output.all}
""")
# shell("""
# cat {output.S1} {output.S2} {output.S3} {output.S4} \
# {output.L1} {output.L2} {output.L3} > {output.all}
# """)
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ rule truvari_INS:
"logs/truvari/truvari_output.{input_combination}.{min_qual}.log"
params:
output_dir = "results/caller_comparison_iGenVar_only/eval/{input_combination}/INS.min_qual_{min_qual}",
truth_set_gz = config["truth_set_INS_renamed_chr"]["gz"],
truth_set_bed = config["truth_set_renamed_chr"]["bed"]
truth_set_gz = config["truth_set_HG002_INS_renamed_chr"]["gz"],
truth_set_bed = config["truth_set_HG002_renamed_chr"]["bed"]
shell:
"""
rm -rf {params.output_dir} && truvari bench -b {params.truth_set_gz} -c {input.vcf} -o {params.output_dir} -p 0 \
Expand All @@ -58,6 +58,10 @@ rule cat_truvari_results_INS:
min_qual = min_qual_iGenVar),
S2 = expand("results/caller_comparison_iGenVar_only/eval/S2/INS.min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
S3 = expand("results/caller_comparison_iGenVar_only/eval/S3/min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
# S4 = expand("results/caller_comparison_iGenVar_only/eval/S4/min_qual_{min_qual}/pr_rec.txt",
# min_qual = min_qual_iGenVar),
L1 = expand("results/caller_comparison_iGenVar_only/eval/L1/INS.min_qual_{min_qual}/pr_rec.txt",
min_qual = min_qual_iGenVar),
L2 = expand("results/caller_comparison_iGenVar_only/eval/L2/INS.min_qual_{min_qual}/pr_rec.txt",
Expand All @@ -67,6 +71,8 @@ rule cat_truvari_results_INS:
output:
S1 = temp("results/caller_comparison_iGenVar_only/eval/S1.INS_results.txt"),
S2 = temp("results/caller_comparison_iGenVar_only/eval/S2.INS_results.txt"),
S3 = temp("results/caller_comparison_iGenVar_only/eval/S3.all_results.txt"),
# S4 = temp("results/caller_comparison_iGenVar_only/eval/S4.all_results.txt"),
L1 = temp("results/caller_comparison_iGenVar_only/eval/L1.INS_results.txt"),
L2 = temp("results/caller_comparison_iGenVar_only/eval/L2.INS_results.txt"),
L3 = temp("results/caller_comparison_iGenVar_only/eval/L3.INS_results.txt"),
Expand All @@ -75,10 +81,16 @@ rule cat_truvari_results_INS:
run:
shell("cat {input.S1} > {output.S1}")
shell("cat {input.S2} > {output.S2}")
shell("cat {input.S3} > {output.S3}")
# shell("cat {input.S4} > {output.S4}")
shell("cat {input.L1} > {output.L1}")
shell("cat {input.L2} > {output.L2}")
shell("cat {input.L3} > {output.L3}")
shell("""
cat {output.S1} {output.S2} \
cat {output.S1} {output.S2} {output.S3} \
{output.L1} {output.L2} {output.L3} > {output.all}
""")
# shell("""
# cat {output.S1} {output.S2} {output.S3} {output.S4} \
# {output.L1} {output.L2} {output.L3} > {output.all}
# """)
Loading