From 087bf51450caeeb70b9b9a7b3b303a41e0377190 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Thu, 4 Dec 2025 16:10:46 -0500 Subject: [PATCH 1/9] Initial commit of Ref Based fast mutation code --- build.gradle.kts | 5 + .../commands/MutateAssemblies.kt | 221 ++++++++++++++++++ .../commands/MutateAssembliesTest.kt | 60 +++++ 3 files changed, 286 insertions(+) create mode 100644 src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt create mode 100644 src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt diff --git a/build.gradle.kts b/build.gradle.kts index c8c38c4..1687796 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -15,6 +15,11 @@ dependencies { implementation("org.apache.logging.log4j:log4j-api:2.24.3") implementation("org.apache.logging.log4j:log4j-core:2.24.3") implementation("org.yaml:snakeyaml:2.3") + implementation("org.biokotlin:biokotlin:1.0.0") + implementation("com.google.guava:guava:33.1.0-jre") + implementation("com.github.samtools:htsjdk:4.0.1") + + testImplementation(kotlin("test")) } diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt new file mode 100644 index 0000000..f1c36a7 --- /dev/null +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -0,0 +1,221 @@ +package net.maizegenetics.net.maizegenetics.commands + +import apple.laf.JRSUIConstants +import biokotlin.seqIO.NucSeqIO +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.options.required +import com.github.ajalt.clikt.parameters.types.path +import com.google.common.collect.Range +import com.google.common.collect.RangeMap +import com.google.common.collect.TreeRangeMap +import htsjdk.variant.vcf.VCFFileReader +import java.io.File +import kotlin.io.path.createDirectories +import kotlin.io.path.exists + + +data class Position(val contig: String, val position: Int) : Comparable { + override fun compareTo(other: Position): Int { + + if (this.contig == other.contig) { + return this.position.compareTo(other.position) + } + + val thisContig = this.contig.replace("chr", "", ignoreCase = true).trim() + val otherContig = other.contig.replace("chr", "", ignoreCase = true).trim() + + return try { + thisContig.toInt() - otherContig.toInt() + } catch (e: NumberFormatException) { + // If we can't convert contigs to an int, then compare the strings + contig.compareTo(other.contig) + } + + } + + override fun toString(): String { + return "$contig:$position" + } +} +data class SimpleVariant(val refStart: Position, val refEnd: Position, + val refAllele: String, val altAllele: String, + val isAddedMutation: Boolean = false) + +@OptIn(kotlin.io.path.ExperimentalPathApi::class) +class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { + + + private val referenceFasta by option( + help = "Reference fasta file" + ).path(mustExist = true, canBeFile = true, canBeDir = false) + .required() + + private val founderGvcf by option( + help = "Founder GVCF to mutate (.gvcf or .g.vcf.gz)" + ).path(mustExist = true, canBeFile = true, canBeDir = false) + .required() + + private val nonFounderGvcf by option( + help = "Non-founder GVCF to pull variants from (.gvcf or .g.vcf.gz)" + ).path(mustExist = true, canBeFile = true, canBeDir = false) + .required() + + + + private val outputDir by option(help = "Output dir") + .path(canBeFile = false, canBeDir = true) + .required() + + override fun run() { + if(!outputDir.exists()) { + outputDir.createDirectories() + } + + introduceMutations(referenceFasta.toFile(), founderGvcf.toFile(), nonFounderGvcf.toFile(), outputDir.toFile()) + + } + + fun introduceMutations(referenceFasta: File, founderGvcf: File, mutationGvcf: File, outputDir: File) { + val refSeq = NucSeqIO(referenceFasta.toString()).readAll() + + //walk through the two gvcf files + //From one pull the mutations left + + //Loop through the founderGVCF and build a RangeMap of Position to Variant + val founderVariantMap = buildFounderVariantMap(founderGvcf) + + addNewVariants(mutationGvcf, founderVariantMap) + } + + fun buildFounderVariantMap(founderGvcf: File) : RangeMap { + //Loop through the founderGVCF and build a RangeMap of Position to Variant + val variantReader = VCFFileReader(founderGvcf) + + val iterator = variantReader.iterator() + + val rangeMap = TreeRangeMap.create() + + while(iterator.hasNext()) { + val vc = iterator.next() + + val refChr = vc.contig + val refStart = vc.start + val refEnd = vc.end + + val refAllele = vc.reference.displayString + val altAlleles = vc.alternateAlleles.map { it.displayString } + + rangeMap.put(Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), + SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAlleles.joinToString(","))) + + } + + return rangeMap + } + + fun addNewVariants(mutationGvcf: File, founderVariantMap: RangeMap) { + val variantReader = VCFFileReader(mutationGvcf) + + val iterator = variantReader.iterator() + + while(iterator.hasNext()) { + val vc = iterator.next() + + val refChr = vc.contig + val refStart = vc.start + val refEnd = vc.end + + val refAllele = vc.reference.displayString + val altAllele = vc.alternateAlleles.map { it.displayString }.first() + + val variantPosition = Position(refChr, refStart) + + val currentSimpleVariant = SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) + + val overlappingVariantSt = founderVariantMap.get(variantPosition) + val overlappingVariantEnd = founderVariantMap.get(Position(refChr, refEnd)) + + if(overlappingVariantSt == overlappingVariantEnd) { + //skip as it will be tricky/slow to handle + continue //TODO figure out a way to handle this well + } + + if(overlappingVariantSt == null) { + //This is a new variant we can add as it does not overlap with an existing variant + founderVariantMap.put(Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), currentSimpleVariant) + } + else { + //we need to split out the existing and add the new variant + updateOverlappingVariant(founderVariantMap, currentSimpleVariant) + } + + } + } + + fun updateOverlappingVariant(founderVariantMap: RangeMap, variant: SimpleVariant) { + //Get out the overlapping entry + val overlappingEntry = founderVariantMap.getEntry(variant.refStart)?: return + + //split it up based on the new variant + val existingVariant = overlappingEntry.value + + if(existingVariant == variant) { + //same variant, nothing to do + return + } + + //Check to see if the existing Variant is a SNP + if(existingVariant.refStart == existingVariant.refEnd) { + //SNP case, we can just replace it + founderVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) + } + else if(existingVariant.refEnd.position > existingVariant.refStart.position && variant.refAllele.length == 1) { + //This is a refBlock case that fully covers the new variant + val splitVariants = splitRefBlock(existingVariant, variant) + + founderVariantMap.remove(overlappingEntry.key) + for(sv in splitVariants) { + founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) + } + } + + + } + + fun splitRefBlock(variantToSplit: SimpleVariant, variantToAdd: SimpleVariant): List { + //check to make sure that the variantToAdd is fully contained within the variantToSplit + require(variantToAdd.refStart >= variantToSplit.refStart && variantToAdd.refEnd <= variantToSplit.refEnd) { + "Variant to add must be fully contained within the variant to split" + } + + val splitVariants = mutableListOf() + //Check to see if the variantToAdd starts at the same position. + if(variantToSplit.refStart != variantToAdd.refStart) { + //We have a left side to create + val leftVariant = SimpleVariant( + refStart = variantToSplit.refStart, + refEnd = Position(variantToAdd.refStart.contig, variantToAdd.refStart.position -1), + refAllele = variantToSplit.refAllele, + altAllele = "", + isAddedMutation = false + ) + splitVariants.add(leftVariant) + } + //Add the new variant + splitVariants.add(variantToAdd) + //Check to see if we have a right side to create + if(variantToSplit.refEnd != variantToAdd.refEnd) { + val rightVariant = SimpleVariant( + refStart = Position(variantToAdd.refEnd.contig, variantToAdd.refEnd.position +1), + refEnd = variantToSplit.refEnd, + refAllele = variantToSplit.refAllele, + altAllele = "", + isAddedMutation = false + ) + splitVariants.add(rightVariant) + } + return splitVariants + } + +} \ No newline at end of file diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt new file mode 100644 index 0000000..f3a24aa --- /dev/null +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -0,0 +1,60 @@ +package net.maizegenetics.commands + +import net.maizegenetics.net.maizegenetics.commands.MutateAssemblies +import net.maizegenetics.net.maizegenetics.commands.Position +import net.maizegenetics.net.maizegenetics.commands.SimpleVariant +import kotlin.test.Test +import kotlin.test.assertFailsWith + +class MutateAssembliesTest { + + @Test + fun splitRefBlockTest() { + val mutateAssemblies = MutateAssemblies() + + //Test non overlapping variants + val variantToSplit = SimpleVariant(Position("chr1", 100), Position("chr1", 200), "A", "") + + val nonOverlappingVariant = SimpleVariant(Position("chr1", 250), Position("chr1", 250), "C", "G") + + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, nonOverlappingVariant) + } + + //check partially overlapping variants + val partialOverlapLeft = SimpleVariant(Position("chr1", 50), Position("chr1", 150), "T", "G") + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, partialOverlapLeft) + } + + val partialOverlapRight = SimpleVariant(Position("chr1", 150), Position("chr1", 250), "T", "G") + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, partialOverlapRight) + } + + val variantToAdd = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "T", "G") + val splitVariants = mutateAssemblies.splitRefBlock(variantToSplit, variantToAdd) + assert(splitVariants.size == 3) + assert(splitVariants[0].refStart == Position("chr1", 100)) + assert(splitVariants[0].refEnd == Position("chr1", 149)) + assert(splitVariants[1] == variantToAdd) + assert(splitVariants[2].refStart == Position("chr1", 151)) + assert(splitVariants[2].refEnd == Position("chr1", 200)) + + //check boundaries + val leftBoundaryVariant = SimpleVariant(Position("chr1", 100), Position("chr1", 100), "A", "G") + val splitVariantsLeft = mutateAssemblies.splitRefBlock(variantToSplit, leftBoundaryVariant) + assert(splitVariantsLeft.size == 2) + assert(splitVariantsLeft[0] == leftBoundaryVariant) + assert(splitVariantsLeft[1].refStart == Position("chr1", 101)) + assert(splitVariantsLeft[1].refEnd == Position("chr1", 200)) + + val rightBoundaryVariant = SimpleVariant(Position("chr1", 200), Position("chr1", 200), "A", "G") + val splitVariantsRight = mutateAssemblies.splitRefBlock(variantToSplit, rightBoundaryVariant) + assert(splitVariantsRight.size == 2) + assert(splitVariantsRight[0].refStart == Position("chr1", 100)) + assert(splitVariantsRight[0].refEnd == Position("chr1", 199)) + assert(splitVariantsRight[1] == rightBoundaryVariant) + + } +} \ No newline at end of file From 9f7e52610f224285ceaa561cf78a5c528ab2c240 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Fri, 5 Dec 2025 14:54:30 -0500 Subject: [PATCH 2/9] Add more unit tests --- .../commands/MutateAssemblies.kt | 16 ++--- .../commands/MutateAssembliesTest.kt | 70 +++++++++++++++++++ 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index f1c36a7..88c79ab 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -44,12 +44,7 @@ data class SimpleVariant(val refStart: Position, val refEnd: Position, @OptIn(kotlin.io.path.ExperimentalPathApi::class) class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { - - - private val referenceFasta by option( - help = "Reference fasta file" - ).path(mustExist = true, canBeFile = true, canBeDir = false) - .required() + private val founderGvcf by option( help = "Founder GVCF to mutate (.gvcf or .g.vcf.gz)" @@ -72,13 +67,11 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { outputDir.createDirectories() } - introduceMutations(referenceFasta.toFile(), founderGvcf.toFile(), nonFounderGvcf.toFile(), outputDir.toFile()) + introduceMutations(founderGvcf.toFile(), nonFounderGvcf.toFile(), outputDir.toFile()) } - fun introduceMutations(referenceFasta: File, founderGvcf: File, mutationGvcf: File, outputDir: File) { - val refSeq = NucSeqIO(referenceFasta.toString()).readAll() - + fun introduceMutations(founderGvcf: File, mutationGvcf: File, outputDir: File) { //walk through the two gvcf files //From one pull the mutations left @@ -170,7 +163,8 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { //SNP case, we can just replace it founderVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) } - else if(existingVariant.refEnd.position > existingVariant.refStart.position && variant.refAllele.length == 1) { + else if( existingVariant.refAllele.length == 1 && existingVariant.altAllele == "" //This checks that the existing variant is a refBlock + && variant.refAllele.length == 1 && variant.altAllele.length == 1 ) { //This means current variant is a SNP //This is a refBlock case that fully covers the new variant val splitVariants = splitRefBlock(existingVariant, variant) diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index f3a24aa..a399391 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -1,9 +1,14 @@ package net.maizegenetics.commands +import apple.laf.JRSUIConstants +import com.google.common.collect.Range +import com.google.common.collect.RangeMap +import com.google.common.collect.TreeRangeMap import net.maizegenetics.net.maizegenetics.commands.MutateAssemblies import net.maizegenetics.net.maizegenetics.commands.Position import net.maizegenetics.net.maizegenetics.commands.SimpleVariant import kotlin.test.Test +import kotlin.test.assertEquals import kotlin.test.assertFailsWith class MutateAssembliesTest { @@ -57,4 +62,69 @@ class MutateAssembliesTest { assert(splitVariantsRight[1] == rightBoundaryVariant) } + + @Test + fun updateOverlappingVariantTest() { + //Need to test to make sure that the removing and adding of variants is working as expected + val mutateAssemblies = MutateAssemblies() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + val initialSize = founderVariantMap.asMapOfRanges().size + + //check a variant outside of the existing ranges + val nonOverlappingVariant = SimpleVariant(Position("chr1", 500), Position("chr1", 500), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariant) + + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + //Try other chromosomes + val nonOverlappingVariantOtherChr = SimpleVariant(Position("chr2", 150), Position("chr2", 150), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariantOtherChr) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + + //Update the SNP at position 150 with the same variant + val sameSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, sameSNPVariant) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + //Update the SNP at position 150 + val overlappingSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "T") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, overlappingSNPVariant) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + val retrievedVariant = founderVariantMap.get(Position("chr1", 150)) + assertEquals(overlappingSNPVariant, retrievedVariant) + + + //add a SNP at position 175 + val newSNPVariant = SimpleVariant(Position("chr1", 175), Position("chr1", 175), "A", "T") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, newSNPVariant) + assertEquals(initialSize + 2, founderVariantMap.asMapOfRanges().size) + val retrievedNewVariant = founderVariantMap.get(Position("chr1", 175)) + assertEquals(newSNPVariant, retrievedNewVariant) + //Check to see if the surrounding refBlocks were updated correctly + val leftRefBlock = founderVariantMap.get(Position("chr1", 174)) + assertEquals(SimpleVariant(Position("chr1", 151), Position("chr1", 174), "T", ""), leftRefBlock) + val rightRefBlock = founderVariantMap.get(Position("chr1", 176)) + assertEquals(SimpleVariant(Position("chr1", 176), Position("chr1", 200), "T", ""), rightRefBlock) + + + } + + private fun createSimpleFounderMap(): RangeMap { + val rangeMap = TreeRangeMap.create() + + rangeMap.put(Range.closed(Position("chr1", 100), Position("chr1", 150)), SimpleVariant(Position("chr1", 100), Position("chr1", 150), "A", "")) + rangeMap.put(Range.closed(Position("chr1", 150), Position("chr1", 150)), SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G")) + rangeMap.put(Range.closed(Position("chr1", 151), Position("chr1", 200)), SimpleVariant(Position("chr1", 151), Position("chr1", 200), "T", "")) + rangeMap.put(Range.closed(Position("chr1", 201), Position("chr1", 205)), SimpleVariant(Position("chr1", 201), Position("chr1", 205), "GGGGG", "G")) + rangeMap.put(Range.closed(Position("chr1", 206), Position("chr1", 300)), SimpleVariant(Position("chr1", 206), Position("chr1", 300), "A", "")) + rangeMap.put(Range.closed(Position("chr1", 301), Position("chr1", 301)), SimpleVariant(Position("chr1", 301), Position("chr1", 301), "G", "C")) + rangeMap.put(Range.closed(Position("chr1", 302), Position("chr1", 400)), SimpleVariant(Position("chr1", 302), Position("chr1", 400), "T", "")) + + + return rangeMap + } } \ No newline at end of file From 202f9712c214ffc5f625a4fb38931d56cfaee3b4 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Fri, 5 Dec 2025 16:59:37 -0500 Subject: [PATCH 3/9] Additional updates for mutation code --- .../net/maizegenetics/commands/MutateAssemblies.kt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 88c79ab..7e89f33 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -44,7 +44,7 @@ data class SimpleVariant(val refStart: Position, val refEnd: Position, @OptIn(kotlin.io.path.ExperimentalPathApi::class) class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { - + private val founderGvcf by option( help = "Founder GVCF to mutate (.gvcf or .g.vcf.gz)" @@ -173,6 +173,18 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) } } + //We also need to handle a complex edge case where we have an indel overlapping another indel. + // If the new indel is fully covered we can remove the existing, add potentially 2 refBlocks surrounding + //TODO check this logic + else if( variant.refStart >= existingVariant.refStart && variant.refEnd <= existingVariant.refEnd ) { + //The new variant is fully contained within the existing variant + val splitVariants = splitRefBlock(existingVariant, variant) + + founderVariantMap.remove(overlappingEntry.key) + for(sv in splitVariants) { + founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) + } + } } From 82a60add51e1438f012075774f316289296c002c Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Tue, 9 Dec 2025 16:11:27 -0500 Subject: [PATCH 4/9] Initial gvcf output --- .../commands/MutateAssemblies.kt | 115 ++++++++++++++++-- .../commands/MutateAssembliesTest.kt | 53 +++++++- 2 files changed, 153 insertions(+), 15 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 7e89f33..10b471d 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -9,8 +9,21 @@ import com.github.ajalt.clikt.parameters.types.path import com.google.common.collect.Range import com.google.common.collect.RangeMap import com.google.common.collect.TreeRangeMap +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.variantcontext.GenotypeBuilder +import htsjdk.variant.variantcontext.VariantContextBuilder +import htsjdk.variant.variantcontext.writer.Options +import htsjdk.variant.variantcontext.writer.VariantContextWriter +import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder import htsjdk.variant.vcf.VCFFileReader +import htsjdk.variant.vcf.VCFFormatHeaderLine +import htsjdk.variant.vcf.VCFHeader +import htsjdk.variant.vcf.VCFHeaderLine +import htsjdk.variant.vcf.VCFHeaderLineCount +import htsjdk.variant.vcf.VCFHeaderLineType +import htsjdk.variant.vcf.VCFInfoHeaderLine import java.io.File +import java.util.HashSet import kotlin.io.path.createDirectories import kotlin.io.path.exists @@ -76,17 +89,20 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { //From one pull the mutations left //Loop through the founderGVCF and build a RangeMap of Position to Variant - val founderVariantMap = buildFounderVariantMap(founderGvcf) + val (sampleName, founderVariantMap) = buildFounderVariantMap(founderGvcf) addNewVariants(mutationGvcf, founderVariantMap) + + writeMutatedGVCF(outputDir, sampleName, founderVariantMap) } - fun buildFounderVariantMap(founderGvcf: File) : RangeMap { + fun buildFounderVariantMap(founderGvcf: File) : Pair> { //Loop through the founderGVCF and build a RangeMap of Position to Variant val variantReader = VCFFileReader(founderGvcf) val iterator = variantReader.iterator() + val sampleName = variantReader.header.sampleNamesInOrder.first() val rangeMap = TreeRangeMap.create() while(iterator.hasNext()) { @@ -104,7 +120,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { } - return rangeMap + return Pair(sampleName,rangeMap) } fun addNewVariants(mutationGvcf: File, founderVariantMap: RangeMap) { @@ -175,16 +191,16 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { } //We also need to handle a complex edge case where we have an indel overlapping another indel. // If the new indel is fully covered we can remove the existing, add potentially 2 refBlocks surrounding - //TODO check this logic - else if( variant.refStart >= existingVariant.refStart && variant.refEnd <= existingVariant.refEnd ) { - //The new variant is fully contained within the existing variant - val splitVariants = splitRefBlock(existingVariant, variant) - - founderVariantMap.remove(overlappingEntry.key) - for(sv in splitVariants) { - founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) - } - } + //TODO make this work with indel edge cases +// else if( variant.refStart >= existingVariant.refStart && variant.refEnd <= existingVariant.refEnd ) { +// //The new variant is fully contained within the existing variant +// val splitVariants = splitRefBlock(existingVariant, variant) +// +// founderVariantMap.remove(overlappingEntry.key) +// for(sv in splitVariants) { +// founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) +// } +// } } @@ -224,4 +240,77 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { return splitVariants } + fun writeMutatedGVCF(outputDir: File, sampleName: String, founderVariantMap: RangeMap) { + val outputGvcfFile = File(outputDir, "${sampleName}_mutated.g.vcf") + VariantContextWriterBuilder() + .unsetOption(Options.INDEX_ON_THE_FLY) + .setOutputFile(outputGvcfFile) + .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF) + .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) + .build().use { writer -> + + writer.writeHeader(createGenericHeader(listOf(sampleName), emptySet())) + + val sortedVariants = founderVariantMap.asMapOfRanges().toList().sortedBy { it.first.lowerEndpoint() } + + for ((range, variant) in sortedVariants) { + val vcBuilder = VariantContextBuilder() + vcBuilder.chr(variant.refStart.contig) + vcBuilder.start(variant.refStart.position.toLong()) + vcBuilder.stop(variant.refEnd.position.toLong()) + vcBuilder.id(".") + vcBuilder.alleles( + listOf( + Allele.create(variant.refAllele, true), + Allele.create(variant.altAllele, false) + ) + ) + vcBuilder.attribute("END", variant.refEnd.position) + val genotypeBuilder = GenotypeBuilder(sampleName) + val alleles = if (variant.altAllele == "") { + listOf(Allele.create(variant.refAllele, true), Allele.create(variant.refAllele, true)) + } else { + listOf(Allele.create(variant.altAllele, false), Allele.create(variant.altAllele, false)) + } + genotypeBuilder.alleles( + alleles + ) + vcBuilder.genotypes(genotypeBuilder.make()) + + writer.add(vcBuilder.make()) + } + + } + } + + fun createGenericHeader(taxaNames: List, altLines:Set): VCFHeader { + val headerLines = createGenericHeaderLineSet() as MutableSet + headerLines.addAll(altLines) + return VCFHeader(headerLines, taxaNames) + } + + fun createGenericHeaderLineSet(): Set { + val headerLines: MutableSet = HashSet() + headerLines.add(VCFFormatHeaderLine("AD", 3, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")) + headerLines.add( + VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)") + ) + headerLines.add(VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality")) + headerLines.add(VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype")) + headerLines.add( + VCFFormatHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification") + ) + headerLines.add(VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total Depth")) + headerLines.add(VCFInfoHeaderLine("NS", 1, VCFHeaderLineType.Integer, "Number of Samples With Data")) + headerLines.add(VCFInfoHeaderLine("AF", 3, VCFHeaderLineType.Integer, "Allele Frequency")) + headerLines.add(VCFInfoHeaderLine("END", 1, VCFHeaderLineType.Integer, "Stop position of the interval")) + // These last 4 are needed for assembly g/hvcfs, but not for reference. I am keeping them in as header + // lines but they will only be added to the data lines if they are present in the VariantContext. + headerLines.add(VCFInfoHeaderLine("ASM_Chr", 1, VCFHeaderLineType.String, "Assembly chromosome")) + headerLines.add(VCFInfoHeaderLine("ASM_Start", 1, VCFHeaderLineType.Integer, "Assembly start position")) + headerLines.add(VCFInfoHeaderLine("ASM_End", 1, VCFHeaderLineType.Integer, "Assembly end position")) + headerLines.add(VCFInfoHeaderLine("ASM_Strand", 1, VCFHeaderLineType.String, "Assembly strand")) + return headerLines + } + } \ No newline at end of file diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index a399391..b4d00d7 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -1,12 +1,13 @@ package net.maizegenetics.commands -import apple.laf.JRSUIConstants import com.google.common.collect.Range import com.google.common.collect.RangeMap import com.google.common.collect.TreeRangeMap +import htsjdk.variant.vcf.VCFFileReader import net.maizegenetics.net.maizegenetics.commands.MutateAssemblies import net.maizegenetics.net.maizegenetics.commands.Position import net.maizegenetics.net.maizegenetics.commands.SimpleVariant +import java.io.File import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertFailsWith @@ -111,8 +112,55 @@ class MutateAssembliesTest { assertEquals(SimpleVariant(Position("chr1", 176), Position("chr1", 200), "T", ""), rightRefBlock) +// //add an indel that is fully contained within an existing indel +// val newIndelVariant = SimpleVariant(Position("chr1", 405), Position("chr1", 407), "GGG", "G") +// mutateAssemblies.updateOverlappingVariant(founderVariantMap, newIndelVariant) +// assertEquals(initialSize, founderVariantMap.asMapOfRanges().size)//Should stay the same size +// //The first ref block should be extended +// val firstRefBlock = founderVariantMap.get(Position("chr1", 401)) +// assertEquals(SimpleVariant(Position("chr1", 302), Position("chr1", 404), "T", ""), firstRefBlock) +// //The new indel should be present +// val retrievedIndel = founderVariantMap.get(Position("chr1", 405)) +// assertEquals(newIndelVariant, retrievedIndel) +// //The last ref block should be extended +// val lastRefBlock = founderVariantMap.get(Position("chr1", 408)) +// assertEquals(SimpleVariant(Position("chr1", 408), Position("chr1", 500), "G", ""), lastRefBlock) + + } + + @Test + fun testWriteMutatedGVCF() { + val mutateAssemblies = MutateAssemblies() + + val homeDir = System.getProperty("user.home").replace('\\', '/') + + val outputDir = "$homeDir/temp/seq_sim/mutated_gvcf_test/" + + File(outputDir).mkdirs() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + mutateAssemblies.writeMutatedGVCF(File(outputDir), "testSample", founderVariantMap) + + //load in the GVCF and check to make sure the variants match + VCFFileReader(File("${outputDir}testSample_mutated.g.vcf"), false).use { vcfReader -> + val variants = vcfReader.iterator().asSequence().toList() + + //should be 7 variants + assertEquals(7, variants.size) + + //check a few variants + val variant150 = variants.find { it.contig == "chr1" && it.start == 150 } + assertEquals("G", variant150!!.genotypes.get(0).alleles[0].displayString) + + val variant301 = variants.find { it.contig == "chr1" && it.start == 301 } + assertEquals("C", variant301!!.genotypes.get(0).alleles[0].displayString) + } + } + private fun createSimpleFounderMap(): RangeMap { val rangeMap = TreeRangeMap.create() @@ -124,7 +172,8 @@ class MutateAssembliesTest { rangeMap.put(Range.closed(Position("chr1", 301), Position("chr1", 301)), SimpleVariant(Position("chr1", 301), Position("chr1", 301), "G", "C")) rangeMap.put(Range.closed(Position("chr1", 302), Position("chr1", 400)), SimpleVariant(Position("chr1", 302), Position("chr1", 400), "T", "")) - +// rangeMap.put(Range.closed(Position("chr1",401), Position("chr1",410)), SimpleVariant(Position("chr1",401), Position("chr1",410), "GGGGGGGGGG", "G")) +// rangeMap.put(Range.closed(Position("chr1",411), Position("chr1",500)), SimpleVariant(Position("chr1",411), Position("chr1",500), "C", "")) return rangeMap } } \ No newline at end of file From 651af144b8967f0d12036684dd8ae43d12a1ace6 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Wed, 10 Dec 2025 09:51:14 -0500 Subject: [PATCH 5/9] Refactor to make some things easier to test --- .../commands/MutateAssemblies.kt | 53 +++++++++++-------- .../commands/MutateAssembliesTest.kt | 6 +++ 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 10b471d..98b2b28 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -11,6 +11,7 @@ import com.google.common.collect.RangeMap import com.google.common.collect.TreeRangeMap import htsjdk.variant.variantcontext.Allele import htsjdk.variant.variantcontext.GenotypeBuilder +import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.variantcontext.VariantContextBuilder import htsjdk.variant.variantcontext.writer.Options import htsjdk.variant.variantcontext.writer.VariantContextWriter @@ -131,34 +132,44 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { while(iterator.hasNext()) { val vc = iterator.next() - val refChr = vc.contig - val refStart = vc.start - val refEnd = vc.end + extractVCAndAddToRangeMap(vc, founderVariantMap) - val refAllele = vc.reference.displayString - val altAllele = vc.alternateAlleles.map { it.displayString }.first() + } + } - val variantPosition = Position(refChr, refStart) + fun extractVCAndAddToRangeMap( + vc: VariantContext, + founderVariantMap: RangeMap + ) { + val refChr = vc.contig + val refStart = vc.start + val refEnd = vc.end - val currentSimpleVariant = SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) + val refAllele = vc.reference.displayString + val altAllele = vc.alternateAlleles.map { it.displayString }.first() - val overlappingVariantSt = founderVariantMap.get(variantPosition) - val overlappingVariantEnd = founderVariantMap.get(Position(refChr, refEnd)) + val variantPosition = Position(refChr, refStart) - if(overlappingVariantSt == overlappingVariantEnd) { - //skip as it will be tricky/slow to handle - continue //TODO figure out a way to handle this well - } + val currentSimpleVariant = + SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) - if(overlappingVariantSt == null) { - //This is a new variant we can add as it does not overlap with an existing variant - founderVariantMap.put(Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), currentSimpleVariant) - } - else { - //we need to split out the existing and add the new variant - updateOverlappingVariant(founderVariantMap, currentSimpleVariant) - } + val overlappingVariantSt = founderVariantMap.get(variantPosition) + val overlappingVariantEnd = founderVariantMap.get(Position(refChr, refEnd)) + if (overlappingVariantSt == overlappingVariantEnd) { + //skip as it will be tricky/slow to handle + return //TODO figure out a way to handle this well + } + + if (overlappingVariantSt == null) { + //This is a new variant we can add as it does not overlap with an existing variant + founderVariantMap.put( + Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), + currentSimpleVariant + ) + } else { + //we need to split out the existing and add the new variant + updateOverlappingVariant(founderVariantMap, currentSimpleVariant) } } diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index b4d00d7..2631b70 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -156,6 +156,12 @@ class MutateAssembliesTest { val variant301 = variants.find { it.contig == "chr1" && it.start == 301 } assertEquals("C", variant301!!.genotypes.get(0).alleles[0].displayString) + + //check one of the indels position 201 + val variant201 = variants.find { it.contig == "chr1" && it.start == 201 } + assertEquals("GGGGG", variant201!!.reference.baseString) + assertEquals("G", variant201!!.genotypes.get(0).alleles[0].displayString) + } } From a9b1feb7e6be7386ed2c90b941ce30005bbf18bc Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Wed, 10 Dec 2025 14:45:38 -0500 Subject: [PATCH 6/9] Adding more unit testing --- .../commands/MutateAssemblies.kt | 21 +++-- .../commands/MutateAssembliesTest.kt | 89 +++++++++++++++++++ 2 files changed, 103 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 98b2b28..11963c8 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -1,7 +1,5 @@ package net.maizegenetics.net.maizegenetics.commands -import apple.laf.JRSUIConstants -import biokotlin.seqIO.NucSeqIO import com.github.ajalt.clikt.core.CliktCommand import com.github.ajalt.clikt.parameters.options.option import com.github.ajalt.clikt.parameters.options.required @@ -14,7 +12,6 @@ import htsjdk.variant.variantcontext.GenotypeBuilder import htsjdk.variant.variantcontext.VariantContext import htsjdk.variant.variantcontext.VariantContextBuilder import htsjdk.variant.variantcontext.writer.Options -import htsjdk.variant.variantcontext.writer.VariantContextWriter import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.VCFFormatHeaderLine @@ -153,15 +150,19 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { val currentSimpleVariant = SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) - val overlappingVariantSt = founderVariantMap.get(variantPosition) - val overlappingVariantEnd = founderVariantMap.get(Position(refChr, refEnd)) + val overlappingVariant = founderVariantMap.get(variantPosition) - if (overlappingVariantSt == overlappingVariantEnd) { + //Skip if refBlock + if(currentSimpleVariant.refAllele.length == 1 && currentSimpleVariant.altAllele == "") { + return + } + + if (isIndel(currentSimpleVariant) && isIndel(overlappingVariant)) { //skip as it will be tricky/slow to handle return //TODO figure out a way to handle this well } - if (overlappingVariantSt == null) { + if (overlappingVariant == null) { //This is a new variant we can add as it does not overlap with an existing variant founderVariantMap.put( Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), @@ -324,4 +325,10 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { return headerLines } + fun isIndel(variant: SimpleVariant?): Boolean { + if(variant == null) return false + return !((variant.refAllele.length == 1 && variant.altAllele.length == 1) || //SNP + (variant.refAllele.length == 1 && variant.altAllele == "")) //RefBlock + } + } \ No newline at end of file diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index 2631b70..63c28bd 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -3,6 +3,8 @@ package net.maizegenetics.commands import com.google.common.collect.Range import com.google.common.collect.RangeMap import com.google.common.collect.TreeRangeMap +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.variantcontext.VariantContextBuilder import htsjdk.variant.vcf.VCFFileReader import net.maizegenetics.net.maizegenetics.commands.MutateAssemblies import net.maizegenetics.net.maizegenetics.commands.Position @@ -167,6 +169,93 @@ class MutateAssembliesTest { } + @Test + fun testExtractVCAndAddToRangeMap() { + val mutateAssemblies = MutateAssemblies() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + val testVariantContextSNPBuilder = VariantContextBuilder() + .chr("chr1") + .start(150) + .stop(150) + .alleles(listOf(Allele.create("C", true), Allele.create("A", false))).make() + + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPBuilder, founderVariantMap) + + //Size should stay the same, the variant should be different + assertEquals(7, founderVariantMap.asMapOfRanges().size) + val variant150 = founderVariantMap.get(Position("chr1", 150)) + assertEquals("A", variant150!!.altAllele) + + //Split a ref block with a SNP + val testVariantContextSNP2Builder = VariantContextBuilder() + .chr("chr1") + .start(175) + .stop(175) + .alleles(listOf(Allele.create("A", true), Allele.create("T", false))).make() + + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNP2Builder, founderVariantMap) + //Size should increase by 2, 1 for new SNP and 1 for trailing ref block + assertEquals(9, founderVariantMap.asMapOfRanges().size) + val variant175 = founderVariantMap.get(Position("chr1", 175)) + assertEquals("T", variant175!!.altAllele) + + + //Introduce a RefBlock overlapping a SNP This should skip + val testVariantContextRefBlockBuilder = VariantContextBuilder() + .chr("chr1") + .start(150) + .stop(200) + .alleles(listOf(Allele.create("C", true), Allele.create("", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextRefBlockBuilder, founderVariantMap) + //Size should keep same size as we skip introducing a ref blocks + assertEquals(9, founderVariantMap.asMapOfRanges().size) + + //add in a completely new variant + val testVariantContextNewSNPBuilder = VariantContextBuilder() + .chr("chr1") + .start(500) + .stop(500) + .alleles(listOf(Allele.create("A", true), Allele.create("G", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextNewSNPBuilder, founderVariantMap) + //Size should increase by 2, 1 for new SNP and 1 for trailing ref block + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant250 = founderVariantMap.get(Position("chr1", 500)) + assertEquals("G", variant250!!.altAllele) + + //Try to add an overlapping indel + val testVariantContextIndelBuilder = VariantContextBuilder() + .chr("chr1") + .start(201) + .stop(203) + .alleles(listOf(Allele.create("GGG", true), Allele.create("G", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextIndelBuilder, founderVariantMap) + //Size should stay the same + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant201 = founderVariantMap.get(Position("chr1", 201)) + assertEquals("G", variant201!!.altAllele) + + } + + @Test + fun testIsIndel() { + val mutateAssemblies = MutateAssemblies() + + val refBlockVariant = SimpleVariant(Position("chr1", 100), Position("chr1", 150), "A", "") + assert(!mutateAssemblies.isIndel(refBlockVariant)) + + val snpVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G") + assert(!mutateAssemblies.isIndel(snpVariant)) + + val insertionVariant = SimpleVariant(Position("chr1", 200), Position("chr1", 200), "A", "AGG") + assert(mutateAssemblies.isIndel(insertionVariant)) + + val deletionVariant = SimpleVariant(Position("chr1", 201), Position("chr1", 205), "GGGGG", "G") + assert(mutateAssemblies.isIndel(deletionVariant)) + } + private fun createSimpleFounderMap(): RangeMap { val rangeMap = TreeRangeMap.create() From 84327091409075444222d9d01b073fe3e7e2763e Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Thu, 11 Dec 2025 15:52:42 -0500 Subject: [PATCH 7/9] Adding more unit tests --- .../commands/MutateAssemblies.kt | 7 +- .../commands/MutateAssembliesTest.kt | 68 ++++++++++++++++++- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 11963c8..3ce4b50 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -96,7 +96,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { fun buildFounderVariantMap(founderGvcf: File) : Pair> { //Loop through the founderGVCF and build a RangeMap of Position to Variant - val variantReader = VCFFileReader(founderGvcf) + val variantReader = VCFFileReader(founderGvcf, false) val iterator = variantReader.iterator() @@ -122,7 +122,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { } fun addNewVariants(mutationGvcf: File, founderVariantMap: RangeMap) { - val variantReader = VCFFileReader(mutationGvcf) + val variantReader = VCFFileReader(mutationGvcf, false) val iterator = variantReader.iterator() @@ -130,7 +130,6 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { val vc = iterator.next() extractVCAndAddToRangeMap(vc, founderVariantMap) - } } @@ -192,7 +191,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { founderVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) } else if( existingVariant.refAllele.length == 1 && existingVariant.altAllele == "" //This checks that the existing variant is a refBlock - && variant.refAllele.length == 1 && variant.altAllele.length == 1 ) { //This means current variant is a SNP + && variant.refAllele.length == 1 ) { //This means current variant is a SNP or single BP insertion which can be treated like a normal SNP as it only covers 1 ref BP //This is a refBlock case that fully covers the new variant val splitVariants = splitRefBlock(existingVariant, variant) diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index 63c28bd..2bc7198 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -16,6 +16,56 @@ import kotlin.test.assertFailsWith class MutateAssembliesTest { + val homeDir = System.getProperty("user.home").replace('\\', '/') + + val outputDir = "$homeDir/temp/seq_sim/mutated_gvcf_test/" + + @Test + fun testBuildFounderVariantMap() { + val mutateAssemblies = MutateAssemblies() + + val founderVariantMap = createSimpleFounderMap() + + //load back in with + val loadedMap = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + + //Should match the original map + assertEquals(founderVariantMap.asMapOfRanges().size, loadedMap.second.asMapOfRanges().size) + for(entry in founderVariantMap.asMapOfRanges().entries) { + val loadedVariant = loadedMap.second.get(entry.key.lowerEndpoint()) + assertEquals(entry.value, loadedVariant) + } + } + + @Test + fun testAddNewVariants() { + val mutateAssemblies = MutateAssemblies() + + val (sampleName,founderVariantMap) = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + + mutateAssemblies.addNewVariants(File("data/MutateAssemblies/nonFounder.g.vcf"), founderVariantMap) + + //This should match the founderMap but we have new variants + assertEquals(11, founderVariantMap.asMapOfRanges().size) + + //Here are the three additions: + //chr1 125 . T A . . END=125 GT 1/1 + //chr1 175 . C TTTT . . END=175 GT 1/1 + //chr1 500 . A T . . END=500 GT 1/1 + val variant125 = founderVariantMap.get(Position("chr1", 125)) + assertEquals(SimpleVariant(Position("chr1", 125), Position("chr1", + 125), "T", "A",true), variant125) + + val variant175 = founderVariantMap.get(Position("chr1", 175)) + assertEquals(SimpleVariant(Position("chr1", 175), Position("chr1", 175), "C", "TTTT", true), variant175) + + val variant500 = founderVariantMap.get(Position("chr1", 500)) + assertEquals(SimpleVariant(Position("chr1", 500), Position("chr1", 500), "A", "T", true), variant500) + + + + } + @Test fun splitRefBlockTest() { val mutateAssemblies = MutateAssemblies() @@ -114,6 +164,7 @@ class MutateAssembliesTest { assertEquals(SimpleVariant(Position("chr1", 176), Position("chr1", 200), "T", ""), rightRefBlock) + //Leaving this in for now as I may want to re-implement this functionality later // //add an indel that is fully contained within an existing indel // val newIndelVariant = SimpleVariant(Position("chr1", 405), Position("chr1", 407), "GGG", "G") // mutateAssemblies.updateOverlappingVariant(founderVariantMap, newIndelVariant) @@ -134,9 +185,6 @@ class MutateAssembliesTest { fun testWriteMutatedGVCF() { val mutateAssemblies = MutateAssemblies() - val homeDir = System.getProperty("user.home").replace('\\', '/') - - val outputDir = "$homeDir/temp/seq_sim/mutated_gvcf_test/" File(outputDir).mkdirs() @@ -237,6 +285,20 @@ class MutateAssembliesTest { val variant201 = founderVariantMap.get(Position("chr1", 201)) assertEquals("G", variant201!!.altAllele) + //Try to add a snp at an indel... this should not add anything + val testVariantContextSNPAtIndelBuilder = VariantContextBuilder() + .chr("chr1") + .start(202) + .stop(202) + .alleles(listOf(Allele.create("G", true), Allele.create("A", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPAtIndelBuilder, founderVariantMap) + //Size should stay the same + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant202 = founderVariantMap.get(Position("chr1", 202)) + assertEquals("G", variant202!!.altAllele) + + + } @Test From a6a9997d764da002fd6448862060522a3cab0238 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Thu, 11 Dec 2025 16:05:20 -0500 Subject: [PATCH 8/9] Adding unit test files and updating gitignore --- .gitignore | 2 +- data/MutateAssemblies/founder.g.vcf | 22 ++++++++++++++++++++++ data/MutateAssemblies/nonFounder.g.vcf | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 data/MutateAssemblies/founder.g.vcf create mode 100644 data/MutateAssemblies/nonFounder.g.vcf diff --git a/.gitignore b/.gitignore index 09cd6ee..772501e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ build/ .idea/jarRepositories.xml .idea/compiler.xml .idea/libraries/ +.idea *.iws *.iml *.ipr @@ -55,7 +56,6 @@ CLAUDE.md ### Project specific ### seq_sim_work/ scripts/align25pairs.sh -data/ ### Pixi environment ### pixi.lock diff --git a/data/MutateAssemblies/founder.g.vcf b/data/MutateAssemblies/founder.g.vcf new file mode 100644 index 0000000..fed224d --- /dev/null +++ b/data/MutateAssemblies/founder.g.vcf @@ -0,0 +1,22 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT loadingTest +chr1 100 . A . . END=150 GT 0/0 +chr1 150 . C G . . END=150 GT 1/1 +chr1 151 . T . . END=200 GT 0/0 +chr1 201 . GGGGG G . . END=205 GT 1/1 +chr1 206 . A . . END=300 GT 0/0 +chr1 301 . G C . . END=301 GT 1/1 +chr1 302 . T . . END=400 GT 0/0 diff --git a/data/MutateAssemblies/nonFounder.g.vcf b/data/MutateAssemblies/nonFounder.g.vcf new file mode 100644 index 0000000..09f9aa0 --- /dev/null +++ b/data/MutateAssemblies/nonFounder.g.vcf @@ -0,0 +1,18 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT loadingTest +chr1 125 . T A . . END=125 GT 1/1 +chr1 175 . C TTTT . . END=175 GT 1/1 +chr1 500 . A T . . END=500 GT 1/1 \ No newline at end of file From d8314a35a62127745c4cf92236fde86ae928f9b5 Mon Sep 17 00:00:00 2001 From: Zack Miller Date: Fri, 12 Dec 2025 13:02:25 -0500 Subject: [PATCH 9/9] Updating version --- build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle.kts b/build.gradle.kts index 47b1b62..96ac0c2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,7 +4,7 @@ plugins { } group = "net.maizegenetics" -version = "0.2.1" +version = "0.2.2" repositories { mavenCentral()