diff --git a/build.gradle.kts b/build.gradle.kts index 867b1fa..68546e7 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,7 +4,7 @@ plugins { } group = "net.maizegenetics" -version = "0.2.3" +version = "0.2.4" repositories { mavenCentral() diff --git a/data/MutateAssemblies/founder.g.vcf b/data/MutateAssemblies/base.g.vcf similarity index 100% rename from data/MutateAssemblies/founder.g.vcf rename to data/MutateAssemblies/base.g.vcf diff --git a/data/MutateAssemblies/nonFounder.g.vcf b/data/MutateAssemblies/mutationDonor.g.vcf similarity index 100% rename from data/MutateAssemblies/nonFounder.g.vcf rename to data/MutateAssemblies/mutationDonor.g.vcf diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt index 3ce4b50..2230077 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -57,13 +57,13 @@ data class SimpleVariant(val refStart: Position, val refEnd: Position, class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { - private val founderGvcf by option( - help = "Founder GVCF to mutate (.gvcf or .g.vcf.gz)" + private val baseGvcf by option( + help = "Base GVCF to mutate (.gvcf or .g.vcf.gz)" ).path(mustExist = true, canBeFile = true, canBeDir = false) .required() - private val nonFounderGvcf by option( - help = "Non-founder GVCF to pull variants from (.gvcf or .g.vcf.gz)" + private val mutationDonorGvcf by option( + help = "GVCF to pull variants from (.gvcf or .g.vcf.gz)" ).path(mustExist = true, canBeFile = true, canBeDir = false) .required() @@ -78,25 +78,25 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { outputDir.createDirectories() } - introduceMutations(founderGvcf.toFile(), nonFounderGvcf.toFile(), outputDir.toFile()) + introduceMutations(baseGvcf.toFile(), mutationDonorGvcf.toFile(), outputDir.toFile()) } - fun introduceMutations(founderGvcf: File, mutationGvcf: File, outputDir: File) { + fun introduceMutations(baseGvcf: File, mutationDonorGvcf: File, outputDir: File) { //walk through the two gvcf files //From one pull the mutations left //Loop through the founderGVCF and build a RangeMap of Position to Variant - val (sampleName, founderVariantMap) = buildFounderVariantMap(founderGvcf) + val (sampleName, baseVariantMap) = buildBaseVariantMap(baseGvcf) - addNewVariants(mutationGvcf, founderVariantMap) + addNewVariants(mutationDonorGvcf, baseVariantMap) - writeMutatedGVCF(outputDir, sampleName, founderVariantMap) + writeMutatedGVCF(outputDir, sampleName, baseVariantMap) } - fun buildFounderVariantMap(founderGvcf: File) : Pair> { + fun buildBaseVariantMap(baseGvcf: File) : Pair> { //Loop through the founderGVCF and build a RangeMap of Position to Variant - val variantReader = VCFFileReader(founderGvcf, false) + val variantReader = VCFFileReader(baseGvcf, false) val iterator = variantReader.iterator() @@ -121,21 +121,21 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { return Pair(sampleName,rangeMap) } - fun addNewVariants(mutationGvcf: File, founderVariantMap: RangeMap) { - val variantReader = VCFFileReader(mutationGvcf, false) + fun addNewVariants(mutationDonorGvcf: File, baseVariantMap: RangeMap) { + val variantReader = VCFFileReader(mutationDonorGvcf, false) val iterator = variantReader.iterator() while(iterator.hasNext()) { val vc = iterator.next() - extractVCAndAddToRangeMap(vc, founderVariantMap) + extractVCAndAddToRangeMap(vc, baseVariantMap) } } fun extractVCAndAddToRangeMap( vc: VariantContext, - founderVariantMap: RangeMap + baseVariantMap: RangeMap ) { val refChr = vc.contig val refStart = vc.start @@ -149,7 +149,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { val currentSimpleVariant = SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) - val overlappingVariant = founderVariantMap.get(variantPosition) + val overlappingVariant = baseVariantMap.get(variantPosition) //Skip if refBlock if(currentSimpleVariant.refAllele.length == 1 && currentSimpleVariant.altAllele == "") { @@ -163,19 +163,19 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { if (overlappingVariant == null) { //This is a new variant we can add as it does not overlap with an existing variant - founderVariantMap.put( + baseVariantMap.put( Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), currentSimpleVariant ) } else { //we need to split out the existing and add the new variant - updateOverlappingVariant(founderVariantMap, currentSimpleVariant) + updateOverlappingVariant(baseVariantMap, currentSimpleVariant) } } - fun updateOverlappingVariant(founderVariantMap: RangeMap, variant: SimpleVariant) { + fun updateOverlappingVariant(baseVariantMap: RangeMap, variant: SimpleVariant) { //Get out the overlapping entry - val overlappingEntry = founderVariantMap.getEntry(variant.refStart)?: return + val overlappingEntry = baseVariantMap.getEntry(variant.refStart)?: return //split it up based on the new variant val existingVariant = overlappingEntry.value @@ -188,16 +188,16 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { //Check to see if the existing Variant is a SNP if(existingVariant.refStart == existingVariant.refEnd) { //SNP case, we can just replace it - founderVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) + baseVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) } else if( existingVariant.refAllele.length == 1 && existingVariant.altAllele == "" //This checks that the existing variant is a refBlock && variant.refAllele.length == 1 ) { //This means current variant is a SNP or single BP insertion which can be treated like a normal SNP as it only covers 1 ref BP //This is a refBlock case that fully covers the new variant val splitVariants = splitRefBlock(existingVariant, variant) - founderVariantMap.remove(overlappingEntry.key) + baseVariantMap.remove(overlappingEntry.key) for(sv in splitVariants) { - founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) + baseVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) } } //We also need to handle a complex edge case where we have an indel overlapping another indel. @@ -251,7 +251,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { return splitVariants } - fun writeMutatedGVCF(outputDir: File, sampleName: String, founderVariantMap: RangeMap) { + fun writeMutatedGVCF(outputDir: File, sampleName: String, baseVariantMap: RangeMap) { val outputGvcfFile = File(outputDir, "${sampleName}_mutated.g.vcf") VariantContextWriterBuilder() .unsetOption(Options.INDEX_ON_THE_FLY) @@ -262,7 +262,7 @@ class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { writer.writeHeader(createGenericHeader(listOf(sampleName), emptySet())) - val sortedVariants = founderVariantMap.asMapOfRanges().toList().sortedBy { it.first.lowerEndpoint() } + val sortedVariants = baseVariantMap.asMapOfRanges().toList().sortedBy { it.first.lowerEndpoint() } for ((range, variant) in sortedVariants) { val vcBuilder = VariantContextBuilder() diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt index 2bc7198..40862c4 100644 --- a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -21,17 +21,17 @@ class MutateAssembliesTest { val outputDir = "$homeDir/temp/seq_sim/mutated_gvcf_test/" @Test - fun testBuildFounderVariantMap() { + fun testBuildBaseVariantMap() { val mutateAssemblies = MutateAssemblies() - val founderVariantMap = createSimpleFounderMap() + val baseVariantMap = createSimpleBaseVariantMap() //load back in with - val loadedMap = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + val loadedMap = mutateAssemblies.buildBaseVariantMap(File("data/MutateAssemblies/base.g.vcf")) //Should match the original map - assertEquals(founderVariantMap.asMapOfRanges().size, loadedMap.second.asMapOfRanges().size) - for(entry in founderVariantMap.asMapOfRanges().entries) { + assertEquals(baseVariantMap.asMapOfRanges().size, loadedMap.second.asMapOfRanges().size) + for(entry in baseVariantMap.asMapOfRanges().entries) { val loadedVariant = loadedMap.second.get(entry.key.lowerEndpoint()) assertEquals(entry.value, loadedVariant) } @@ -41,25 +41,25 @@ class MutateAssembliesTest { fun testAddNewVariants() { val mutateAssemblies = MutateAssemblies() - val (sampleName,founderVariantMap) = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + val (sampleName,baseVariantMap) = mutateAssemblies.buildBaseVariantMap(File("data/MutateAssemblies/base.g.vcf")) - mutateAssemblies.addNewVariants(File("data/MutateAssemblies/nonFounder.g.vcf"), founderVariantMap) + mutateAssemblies.addNewVariants(File("data/MutateAssemblies/mutationDonor.g.vcf"), baseVariantMap) - //This should match the founderMap but we have new variants - assertEquals(11, founderVariantMap.asMapOfRanges().size) + //This should match the BaseMap but we have new variants + assertEquals(11, baseVariantMap.asMapOfRanges().size) //Here are the three additions: //chr1 125 . T A . . END=125 GT 1/1 //chr1 175 . C TTTT . . END=175 GT 1/1 //chr1 500 . A T . . END=500 GT 1/1 - val variant125 = founderVariantMap.get(Position("chr1", 125)) + val variant125 = baseVariantMap.get(Position("chr1", 125)) assertEquals(SimpleVariant(Position("chr1", 125), Position("chr1", 125), "T", "A",true), variant125) - val variant175 = founderVariantMap.get(Position("chr1", 175)) + val variant175 = baseVariantMap.get(Position("chr1", 175)) assertEquals(SimpleVariant(Position("chr1", 175), Position("chr1", 175), "C", "TTTT", true), variant175) - val variant500 = founderVariantMap.get(Position("chr1", 500)) + val variant500 = baseVariantMap.get(Position("chr1", 500)) assertEquals(SimpleVariant(Position("chr1", 500), Position("chr1", 500), "A", "T", true), variant500) @@ -122,45 +122,45 @@ class MutateAssembliesTest { val mutateAssemblies = MutateAssemblies() //need a range map to work with - val founderVariantMap = createSimpleFounderMap() + val baseVariantMap = createSimpleBaseVariantMap() - val initialSize = founderVariantMap.asMapOfRanges().size + val initialSize = baseVariantMap.asMapOfRanges().size //check a variant outside of the existing ranges val nonOverlappingVariant = SimpleVariant(Position("chr1", 500), Position("chr1", 500), "C", "G") - mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariant) + mutateAssemblies.updateOverlappingVariant(baseVariantMap, nonOverlappingVariant) - assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + assertEquals(initialSize, baseVariantMap.asMapOfRanges().size) //Try other chromosomes val nonOverlappingVariantOtherChr = SimpleVariant(Position("chr2", 150), Position("chr2", 150), "C", "G") - mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariantOtherChr) - assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + mutateAssemblies.updateOverlappingVariant(baseVariantMap, nonOverlappingVariantOtherChr) + assertEquals(initialSize, baseVariantMap.asMapOfRanges().size) //Update the SNP at position 150 with the same variant val sameSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G") - mutateAssemblies.updateOverlappingVariant(founderVariantMap, sameSNPVariant) - assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + mutateAssemblies.updateOverlappingVariant(baseVariantMap, sameSNPVariant) + assertEquals(initialSize, baseVariantMap.asMapOfRanges().size) //Update the SNP at position 150 val overlappingSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "T") - mutateAssemblies.updateOverlappingVariant(founderVariantMap, overlappingSNPVariant) - assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) - val retrievedVariant = founderVariantMap.get(Position("chr1", 150)) + mutateAssemblies.updateOverlappingVariant(baseVariantMap, overlappingSNPVariant) + assertEquals(initialSize, baseVariantMap.asMapOfRanges().size) + val retrievedVariant = baseVariantMap.get(Position("chr1", 150)) assertEquals(overlappingSNPVariant, retrievedVariant) //add a SNP at position 175 val newSNPVariant = SimpleVariant(Position("chr1", 175), Position("chr1", 175), "A", "T") - mutateAssemblies.updateOverlappingVariant(founderVariantMap, newSNPVariant) - assertEquals(initialSize + 2, founderVariantMap.asMapOfRanges().size) - val retrievedNewVariant = founderVariantMap.get(Position("chr1", 175)) + mutateAssemblies.updateOverlappingVariant(baseVariantMap, newSNPVariant) + assertEquals(initialSize + 2, baseVariantMap.asMapOfRanges().size) + val retrievedNewVariant = baseVariantMap.get(Position("chr1", 175)) assertEquals(newSNPVariant, retrievedNewVariant) //Check to see if the surrounding refBlocks were updated correctly - val leftRefBlock = founderVariantMap.get(Position("chr1", 174)) + val leftRefBlock = baseVariantMap.get(Position("chr1", 174)) assertEquals(SimpleVariant(Position("chr1", 151), Position("chr1", 174), "T", ""), leftRefBlock) - val rightRefBlock = founderVariantMap.get(Position("chr1", 176)) + val rightRefBlock = baseVariantMap.get(Position("chr1", 176)) assertEquals(SimpleVariant(Position("chr1", 176), Position("chr1", 200), "T", ""), rightRefBlock) @@ -189,9 +189,9 @@ class MutateAssembliesTest { File(outputDir).mkdirs() //need a range map to work with - val founderVariantMap = createSimpleFounderMap() + val baseVariantMap = createSimpleBaseVariantMap() - mutateAssemblies.writeMutatedGVCF(File(outputDir), "testSample", founderVariantMap) + mutateAssemblies.writeMutatedGVCF(File(outputDir), "testSample", baseVariantMap) //load in the GVCF and check to make sure the variants match VCFFileReader(File("${outputDir}testSample_mutated.g.vcf"), false).use { vcfReader -> @@ -222,7 +222,7 @@ class MutateAssembliesTest { val mutateAssemblies = MutateAssemblies() //need a range map to work with - val founderVariantMap = createSimpleFounderMap() + val baseVariantMap = createSimpleBaseVariantMap() val testVariantContextSNPBuilder = VariantContextBuilder() .chr("chr1") @@ -230,11 +230,11 @@ class MutateAssembliesTest { .stop(150) .alleles(listOf(Allele.create("C", true), Allele.create("A", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPBuilder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPBuilder, baseVariantMap) //Size should stay the same, the variant should be different - assertEquals(7, founderVariantMap.asMapOfRanges().size) - val variant150 = founderVariantMap.get(Position("chr1", 150)) + assertEquals(7, baseVariantMap.asMapOfRanges().size) + val variant150 = baseVariantMap.get(Position("chr1", 150)) assertEquals("A", variant150!!.altAllele) //Split a ref block with a SNP @@ -244,10 +244,10 @@ class MutateAssembliesTest { .stop(175) .alleles(listOf(Allele.create("A", true), Allele.create("T", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNP2Builder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNP2Builder, baseVariantMap) //Size should increase by 2, 1 for new SNP and 1 for trailing ref block - assertEquals(9, founderVariantMap.asMapOfRanges().size) - val variant175 = founderVariantMap.get(Position("chr1", 175)) + assertEquals(9, baseVariantMap.asMapOfRanges().size) + val variant175 = baseVariantMap.get(Position("chr1", 175)) assertEquals("T", variant175!!.altAllele) @@ -257,9 +257,9 @@ class MutateAssembliesTest { .start(150) .stop(200) .alleles(listOf(Allele.create("C", true), Allele.create("", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextRefBlockBuilder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextRefBlockBuilder, baseVariantMap) //Size should keep same size as we skip introducing a ref blocks - assertEquals(9, founderVariantMap.asMapOfRanges().size) + assertEquals(9, baseVariantMap.asMapOfRanges().size) //add in a completely new variant val testVariantContextNewSNPBuilder = VariantContextBuilder() @@ -267,10 +267,10 @@ class MutateAssembliesTest { .start(500) .stop(500) .alleles(listOf(Allele.create("A", true), Allele.create("G", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextNewSNPBuilder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextNewSNPBuilder, baseVariantMap) //Size should increase by 2, 1 for new SNP and 1 for trailing ref block - assertEquals(10, founderVariantMap.asMapOfRanges().size) - val variant250 = founderVariantMap.get(Position("chr1", 500)) + assertEquals(10, baseVariantMap.asMapOfRanges().size) + val variant250 = baseVariantMap.get(Position("chr1", 500)) assertEquals("G", variant250!!.altAllele) //Try to add an overlapping indel @@ -279,10 +279,10 @@ class MutateAssembliesTest { .start(201) .stop(203) .alleles(listOf(Allele.create("GGG", true), Allele.create("G", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextIndelBuilder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextIndelBuilder, baseVariantMap) //Size should stay the same - assertEquals(10, founderVariantMap.asMapOfRanges().size) - val variant201 = founderVariantMap.get(Position("chr1", 201)) + assertEquals(10, baseVariantMap.asMapOfRanges().size) + val variant201 = baseVariantMap.get(Position("chr1", 201)) assertEquals("G", variant201!!.altAllele) //Try to add a snp at an indel... this should not add anything @@ -291,10 +291,10 @@ class MutateAssembliesTest { .start(202) .stop(202) .alleles(listOf(Allele.create("G", true), Allele.create("A", false))).make() - mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPAtIndelBuilder, founderVariantMap) + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPAtIndelBuilder, baseVariantMap) //Size should stay the same - assertEquals(10, founderVariantMap.asMapOfRanges().size) - val variant202 = founderVariantMap.get(Position("chr1", 202)) + assertEquals(10, baseVariantMap.asMapOfRanges().size) + val variant202 = baseVariantMap.get(Position("chr1", 202)) assertEquals("G", variant202!!.altAllele) @@ -318,7 +318,7 @@ class MutateAssembliesTest { assert(mutateAssemblies.isIndel(deletionVariant)) } - private fun createSimpleFounderMap(): RangeMap { + private fun createSimpleBaseVariantMap(): RangeMap { val rangeMap = TreeRangeMap.create() rangeMap.put(Range.closed(Position("chr1", 100), Position("chr1", 150)), SimpleVariant(Position("chr1", 100), Position("chr1", 150), "A", ""))