diff --git a/.gitignore b/.gitignore index 09cd6ee..772501e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ build/ .idea/jarRepositories.xml .idea/compiler.xml .idea/libraries/ +.idea *.iws *.iml *.ipr @@ -55,7 +56,6 @@ CLAUDE.md ### Project specific ### seq_sim_work/ scripts/align25pairs.sh -data/ ### Pixi environment ### pixi.lock diff --git a/build.gradle.kts b/build.gradle.kts index 774750a..96ac0c2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,7 +4,7 @@ plugins { } group = "net.maizegenetics" -version = "0.2.1" +version = "0.2.2" repositories { mavenCentral() @@ -15,6 +15,11 @@ dependencies { implementation("org.apache.logging.log4j:log4j-api:2.24.3") implementation("org.apache.logging.log4j:log4j-core:2.24.3") implementation("org.yaml:snakeyaml:2.3") + implementation("org.biokotlin:biokotlin:1.0.0") + implementation("com.google.guava:guava:33.1.0-jre") + implementation("com.github.samtools:htsjdk:4.0.1") + + testImplementation(kotlin("test")) } diff --git a/data/MutateAssemblies/founder.g.vcf b/data/MutateAssemblies/founder.g.vcf new file mode 100644 index 0000000..fed224d --- /dev/null +++ b/data/MutateAssemblies/founder.g.vcf @@ -0,0 +1,22 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT loadingTest +chr1 100 . A . . END=150 GT 0/0 +chr1 150 . C G . . END=150 GT 1/1 +chr1 151 . T . . END=200 GT 0/0 +chr1 201 . GGGGG G . . END=205 GT 1/1 +chr1 206 . A . . END=300 GT 0/0 +chr1 301 . G C . . END=301 GT 1/1 +chr1 302 . T . . END=400 GT 0/0 diff --git a/data/MutateAssemblies/nonFounder.g.vcf b/data/MutateAssemblies/nonFounder.g.vcf new file mode 100644 index 0000000..09f9aa0 --- /dev/null +++ b/data/MutateAssemblies/nonFounder.g.vcf @@ -0,0 +1,18 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT loadingTest +chr1 125 . T A . . END=125 GT 1/1 +chr1 175 . C TTTT . . END=175 GT 1/1 +chr1 500 . A T . . END=500 GT 1/1 \ No newline at end of file diff --git a/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt new file mode 100644 index 0000000..3ce4b50 --- /dev/null +++ b/src/main/kotlin/net/maizegenetics/commands/MutateAssemblies.kt @@ -0,0 +1,333 @@ +package net.maizegenetics.net.maizegenetics.commands + +import com.github.ajalt.clikt.core.CliktCommand +import com.github.ajalt.clikt.parameters.options.option +import com.github.ajalt.clikt.parameters.options.required +import com.github.ajalt.clikt.parameters.types.path +import com.google.common.collect.Range +import com.google.common.collect.RangeMap +import com.google.common.collect.TreeRangeMap +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.variantcontext.GenotypeBuilder +import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.variantcontext.VariantContextBuilder +import htsjdk.variant.variantcontext.writer.Options +import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder +import htsjdk.variant.vcf.VCFFileReader +import htsjdk.variant.vcf.VCFFormatHeaderLine +import htsjdk.variant.vcf.VCFHeader +import htsjdk.variant.vcf.VCFHeaderLine +import htsjdk.variant.vcf.VCFHeaderLineCount +import htsjdk.variant.vcf.VCFHeaderLineType +import htsjdk.variant.vcf.VCFInfoHeaderLine +import java.io.File +import java.util.HashSet +import kotlin.io.path.createDirectories +import kotlin.io.path.exists + + +data class Position(val contig: String, val position: Int) : Comparable { + override fun compareTo(other: Position): Int { + + if (this.contig == other.contig) { + return this.position.compareTo(other.position) + } + + val thisContig = this.contig.replace("chr", "", ignoreCase = true).trim() + val otherContig = other.contig.replace("chr", "", ignoreCase = true).trim() + + return try { + thisContig.toInt() - otherContig.toInt() + } catch (e: NumberFormatException) { + // If we can't convert contigs to an int, then compare the strings + contig.compareTo(other.contig) + } + + } + + override fun toString(): String { + return "$contig:$position" + } +} +data class SimpleVariant(val refStart: Position, val refEnd: Position, + val refAllele: String, val altAllele: String, + val isAddedMutation: Boolean = false) + +@OptIn(kotlin.io.path.ExperimentalPathApi::class) +class MutateAssemblies : CliktCommand(name = "mutate-assemblies") { + + + private val founderGvcf by option( + help = "Founder GVCF to mutate (.gvcf or .g.vcf.gz)" + ).path(mustExist = true, canBeFile = true, canBeDir = false) + .required() + + private val nonFounderGvcf by option( + help = "Non-founder GVCF to pull variants from (.gvcf or .g.vcf.gz)" + ).path(mustExist = true, canBeFile = true, canBeDir = false) + .required() + + + + private val outputDir by option(help = "Output dir") + .path(canBeFile = false, canBeDir = true) + .required() + + override fun run() { + if(!outputDir.exists()) { + outputDir.createDirectories() + } + + introduceMutations(founderGvcf.toFile(), nonFounderGvcf.toFile(), outputDir.toFile()) + + } + + fun introduceMutations(founderGvcf: File, mutationGvcf: File, outputDir: File) { + //walk through the two gvcf files + //From one pull the mutations left + + //Loop through the founderGVCF and build a RangeMap of Position to Variant + val (sampleName, founderVariantMap) = buildFounderVariantMap(founderGvcf) + + addNewVariants(mutationGvcf, founderVariantMap) + + writeMutatedGVCF(outputDir, sampleName, founderVariantMap) + } + + fun buildFounderVariantMap(founderGvcf: File) : Pair> { + //Loop through the founderGVCF and build a RangeMap of Position to Variant + val variantReader = VCFFileReader(founderGvcf, false) + + val iterator = variantReader.iterator() + + val sampleName = variantReader.header.sampleNamesInOrder.first() + val rangeMap = TreeRangeMap.create() + + while(iterator.hasNext()) { + val vc = iterator.next() + + val refChr = vc.contig + val refStart = vc.start + val refEnd = vc.end + + val refAllele = vc.reference.displayString + val altAlleles = vc.alternateAlleles.map { it.displayString } + + rangeMap.put(Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), + SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAlleles.joinToString(","))) + + } + + return Pair(sampleName,rangeMap) + } + + fun addNewVariants(mutationGvcf: File, founderVariantMap: RangeMap) { + val variantReader = VCFFileReader(mutationGvcf, false) + + val iterator = variantReader.iterator() + + while(iterator.hasNext()) { + val vc = iterator.next() + + extractVCAndAddToRangeMap(vc, founderVariantMap) + } + } + + fun extractVCAndAddToRangeMap( + vc: VariantContext, + founderVariantMap: RangeMap + ) { + val refChr = vc.contig + val refStart = vc.start + val refEnd = vc.end + + val refAllele = vc.reference.displayString + val altAllele = vc.alternateAlleles.map { it.displayString }.first() + + val variantPosition = Position(refChr, refStart) + + val currentSimpleVariant = + SimpleVariant(Position(refChr, refStart), Position(refChr, refEnd), refAllele, altAllele, true) + + val overlappingVariant = founderVariantMap.get(variantPosition) + + //Skip if refBlock + if(currentSimpleVariant.refAllele.length == 1 && currentSimpleVariant.altAllele == "") { + return + } + + if (isIndel(currentSimpleVariant) && isIndel(overlappingVariant)) { + //skip as it will be tricky/slow to handle + return //TODO figure out a way to handle this well + } + + if (overlappingVariant == null) { + //This is a new variant we can add as it does not overlap with an existing variant + founderVariantMap.put( + Range.closed(Position(refChr, refStart), Position(refChr, refEnd)), + currentSimpleVariant + ) + } else { + //we need to split out the existing and add the new variant + updateOverlappingVariant(founderVariantMap, currentSimpleVariant) + } + } + + fun updateOverlappingVariant(founderVariantMap: RangeMap, variant: SimpleVariant) { + //Get out the overlapping entry + val overlappingEntry = founderVariantMap.getEntry(variant.refStart)?: return + + //split it up based on the new variant + val existingVariant = overlappingEntry.value + + if(existingVariant == variant) { + //same variant, nothing to do + return + } + + //Check to see if the existing Variant is a SNP + if(existingVariant.refStart == existingVariant.refEnd) { + //SNP case, we can just replace it + founderVariantMap.put(Range.closed(variant.refStart, variant.refEnd), variant) + } + else if( existingVariant.refAllele.length == 1 && existingVariant.altAllele == "" //This checks that the existing variant is a refBlock + && variant.refAllele.length == 1 ) { //This means current variant is a SNP or single BP insertion which can be treated like a normal SNP as it only covers 1 ref BP + //This is a refBlock case that fully covers the new variant + val splitVariants = splitRefBlock(existingVariant, variant) + + founderVariantMap.remove(overlappingEntry.key) + for(sv in splitVariants) { + founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) + } + } + //We also need to handle a complex edge case where we have an indel overlapping another indel. + // If the new indel is fully covered we can remove the existing, add potentially 2 refBlocks surrounding + //TODO make this work with indel edge cases +// else if( variant.refStart >= existingVariant.refStart && variant.refEnd <= existingVariant.refEnd ) { +// //The new variant is fully contained within the existing variant +// val splitVariants = splitRefBlock(existingVariant, variant) +// +// founderVariantMap.remove(overlappingEntry.key) +// for(sv in splitVariants) { +// founderVariantMap.put(Range.closed(sv.refStart, sv.refEnd), sv) +// } +// } + + + } + + fun splitRefBlock(variantToSplit: SimpleVariant, variantToAdd: SimpleVariant): List { + //check to make sure that the variantToAdd is fully contained within the variantToSplit + require(variantToAdd.refStart >= variantToSplit.refStart && variantToAdd.refEnd <= variantToSplit.refEnd) { + "Variant to add must be fully contained within the variant to split" + } + + val splitVariants = mutableListOf() + //Check to see if the variantToAdd starts at the same position. + if(variantToSplit.refStart != variantToAdd.refStart) { + //We have a left side to create + val leftVariant = SimpleVariant( + refStart = variantToSplit.refStart, + refEnd = Position(variantToAdd.refStart.contig, variantToAdd.refStart.position -1), + refAllele = variantToSplit.refAllele, + altAllele = "", + isAddedMutation = false + ) + splitVariants.add(leftVariant) + } + //Add the new variant + splitVariants.add(variantToAdd) + //Check to see if we have a right side to create + if(variantToSplit.refEnd != variantToAdd.refEnd) { + val rightVariant = SimpleVariant( + refStart = Position(variantToAdd.refEnd.contig, variantToAdd.refEnd.position +1), + refEnd = variantToSplit.refEnd, + refAllele = variantToSplit.refAllele, + altAllele = "", + isAddedMutation = false + ) + splitVariants.add(rightVariant) + } + return splitVariants + } + + fun writeMutatedGVCF(outputDir: File, sampleName: String, founderVariantMap: RangeMap) { + val outputGvcfFile = File(outputDir, "${sampleName}_mutated.g.vcf") + VariantContextWriterBuilder() + .unsetOption(Options.INDEX_ON_THE_FLY) + .setOutputFile(outputGvcfFile) + .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF) + .setOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER) + .build().use { writer -> + + writer.writeHeader(createGenericHeader(listOf(sampleName), emptySet())) + + val sortedVariants = founderVariantMap.asMapOfRanges().toList().sortedBy { it.first.lowerEndpoint() } + + for ((range, variant) in sortedVariants) { + val vcBuilder = VariantContextBuilder() + vcBuilder.chr(variant.refStart.contig) + vcBuilder.start(variant.refStart.position.toLong()) + vcBuilder.stop(variant.refEnd.position.toLong()) + vcBuilder.id(".") + vcBuilder.alleles( + listOf( + Allele.create(variant.refAllele, true), + Allele.create(variant.altAllele, false) + ) + ) + vcBuilder.attribute("END", variant.refEnd.position) + val genotypeBuilder = GenotypeBuilder(sampleName) + val alleles = if (variant.altAllele == "") { + listOf(Allele.create(variant.refAllele, true), Allele.create(variant.refAllele, true)) + } else { + listOf(Allele.create(variant.altAllele, false), Allele.create(variant.altAllele, false)) + } + genotypeBuilder.alleles( + alleles + ) + vcBuilder.genotypes(genotypeBuilder.make()) + + writer.add(vcBuilder.make()) + } + + } + } + + fun createGenericHeader(taxaNames: List, altLines:Set): VCFHeader { + val headerLines = createGenericHeaderLineSet() as MutableSet + headerLines.addAll(altLines) + return VCFHeader(headerLines, taxaNames) + } + + fun createGenericHeaderLineSet(): Set { + val headerLines: MutableSet = HashSet() + headerLines.add(VCFFormatHeaderLine("AD", 3, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")) + headerLines.add( + VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)") + ) + headerLines.add(VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality")) + headerLines.add(VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype")) + headerLines.add( + VCFFormatHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification") + ) + headerLines.add(VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total Depth")) + headerLines.add(VCFInfoHeaderLine("NS", 1, VCFHeaderLineType.Integer, "Number of Samples With Data")) + headerLines.add(VCFInfoHeaderLine("AF", 3, VCFHeaderLineType.Integer, "Allele Frequency")) + headerLines.add(VCFInfoHeaderLine("END", 1, VCFHeaderLineType.Integer, "Stop position of the interval")) + // These last 4 are needed for assembly g/hvcfs, but not for reference. I am keeping them in as header + // lines but they will only be added to the data lines if they are present in the VariantContext. + headerLines.add(VCFInfoHeaderLine("ASM_Chr", 1, VCFHeaderLineType.String, "Assembly chromosome")) + headerLines.add(VCFInfoHeaderLine("ASM_Start", 1, VCFHeaderLineType.Integer, "Assembly start position")) + headerLines.add(VCFInfoHeaderLine("ASM_End", 1, VCFHeaderLineType.Integer, "Assembly end position")) + headerLines.add(VCFInfoHeaderLine("ASM_Strand", 1, VCFHeaderLineType.String, "Assembly strand")) + return headerLines + } + + fun isIndel(variant: SimpleVariant?): Boolean { + if(variant == null) return false + return !((variant.refAllele.length == 1 && variant.altAllele.length == 1) || //SNP + (variant.refAllele.length == 1 && variant.altAllele == "")) //RefBlock + } + +} \ No newline at end of file diff --git a/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt new file mode 100644 index 0000000..2bc7198 --- /dev/null +++ b/src/test/kotlin/net/maizegenetics/commands/MutateAssembliesTest.kt @@ -0,0 +1,336 @@ +package net.maizegenetics.commands + +import com.google.common.collect.Range +import com.google.common.collect.RangeMap +import com.google.common.collect.TreeRangeMap +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.variantcontext.VariantContextBuilder +import htsjdk.variant.vcf.VCFFileReader +import net.maizegenetics.net.maizegenetics.commands.MutateAssemblies +import net.maizegenetics.net.maizegenetics.commands.Position +import net.maizegenetics.net.maizegenetics.commands.SimpleVariant +import java.io.File +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith + +class MutateAssembliesTest { + + val homeDir = System.getProperty("user.home").replace('\\', '/') + + val outputDir = "$homeDir/temp/seq_sim/mutated_gvcf_test/" + + @Test + fun testBuildFounderVariantMap() { + val mutateAssemblies = MutateAssemblies() + + val founderVariantMap = createSimpleFounderMap() + + //load back in with + val loadedMap = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + + //Should match the original map + assertEquals(founderVariantMap.asMapOfRanges().size, loadedMap.second.asMapOfRanges().size) + for(entry in founderVariantMap.asMapOfRanges().entries) { + val loadedVariant = loadedMap.second.get(entry.key.lowerEndpoint()) + assertEquals(entry.value, loadedVariant) + } + } + + @Test + fun testAddNewVariants() { + val mutateAssemblies = MutateAssemblies() + + val (sampleName,founderVariantMap) = mutateAssemblies.buildFounderVariantMap(File("data/MutateAssemblies/founder.g.vcf")) + + mutateAssemblies.addNewVariants(File("data/MutateAssemblies/nonFounder.g.vcf"), founderVariantMap) + + //This should match the founderMap but we have new variants + assertEquals(11, founderVariantMap.asMapOfRanges().size) + + //Here are the three additions: + //chr1 125 . T A . . END=125 GT 1/1 + //chr1 175 . C TTTT . . END=175 GT 1/1 + //chr1 500 . A T . . END=500 GT 1/1 + val variant125 = founderVariantMap.get(Position("chr1", 125)) + assertEquals(SimpleVariant(Position("chr1", 125), Position("chr1", + 125), "T", "A",true), variant125) + + val variant175 = founderVariantMap.get(Position("chr1", 175)) + assertEquals(SimpleVariant(Position("chr1", 175), Position("chr1", 175), "C", "TTTT", true), variant175) + + val variant500 = founderVariantMap.get(Position("chr1", 500)) + assertEquals(SimpleVariant(Position("chr1", 500), Position("chr1", 500), "A", "T", true), variant500) + + + + } + + @Test + fun splitRefBlockTest() { + val mutateAssemblies = MutateAssemblies() + + //Test non overlapping variants + val variantToSplit = SimpleVariant(Position("chr1", 100), Position("chr1", 200), "A", "") + + val nonOverlappingVariant = SimpleVariant(Position("chr1", 250), Position("chr1", 250), "C", "G") + + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, nonOverlappingVariant) + } + + //check partially overlapping variants + val partialOverlapLeft = SimpleVariant(Position("chr1", 50), Position("chr1", 150), "T", "G") + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, partialOverlapLeft) + } + + val partialOverlapRight = SimpleVariant(Position("chr1", 150), Position("chr1", 250), "T", "G") + assertFailsWith { + mutateAssemblies.splitRefBlock(variantToSplit, partialOverlapRight) + } + + val variantToAdd = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "T", "G") + val splitVariants = mutateAssemblies.splitRefBlock(variantToSplit, variantToAdd) + assert(splitVariants.size == 3) + assert(splitVariants[0].refStart == Position("chr1", 100)) + assert(splitVariants[0].refEnd == Position("chr1", 149)) + assert(splitVariants[1] == variantToAdd) + assert(splitVariants[2].refStart == Position("chr1", 151)) + assert(splitVariants[2].refEnd == Position("chr1", 200)) + + //check boundaries + val leftBoundaryVariant = SimpleVariant(Position("chr1", 100), Position("chr1", 100), "A", "G") + val splitVariantsLeft = mutateAssemblies.splitRefBlock(variantToSplit, leftBoundaryVariant) + assert(splitVariantsLeft.size == 2) + assert(splitVariantsLeft[0] == leftBoundaryVariant) + assert(splitVariantsLeft[1].refStart == Position("chr1", 101)) + assert(splitVariantsLeft[1].refEnd == Position("chr1", 200)) + + val rightBoundaryVariant = SimpleVariant(Position("chr1", 200), Position("chr1", 200), "A", "G") + val splitVariantsRight = mutateAssemblies.splitRefBlock(variantToSplit, rightBoundaryVariant) + assert(splitVariantsRight.size == 2) + assert(splitVariantsRight[0].refStart == Position("chr1", 100)) + assert(splitVariantsRight[0].refEnd == Position("chr1", 199)) + assert(splitVariantsRight[1] == rightBoundaryVariant) + + } + + @Test + fun updateOverlappingVariantTest() { + //Need to test to make sure that the removing and adding of variants is working as expected + val mutateAssemblies = MutateAssemblies() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + val initialSize = founderVariantMap.asMapOfRanges().size + + //check a variant outside of the existing ranges + val nonOverlappingVariant = SimpleVariant(Position("chr1", 500), Position("chr1", 500), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariant) + + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + //Try other chromosomes + val nonOverlappingVariantOtherChr = SimpleVariant(Position("chr2", 150), Position("chr2", 150), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, nonOverlappingVariantOtherChr) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + + //Update the SNP at position 150 with the same variant + val sameSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, sameSNPVariant) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + + //Update the SNP at position 150 + val overlappingSNPVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "T") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, overlappingSNPVariant) + assertEquals(initialSize, founderVariantMap.asMapOfRanges().size) + val retrievedVariant = founderVariantMap.get(Position("chr1", 150)) + assertEquals(overlappingSNPVariant, retrievedVariant) + + + //add a SNP at position 175 + val newSNPVariant = SimpleVariant(Position("chr1", 175), Position("chr1", 175), "A", "T") + mutateAssemblies.updateOverlappingVariant(founderVariantMap, newSNPVariant) + assertEquals(initialSize + 2, founderVariantMap.asMapOfRanges().size) + val retrievedNewVariant = founderVariantMap.get(Position("chr1", 175)) + assertEquals(newSNPVariant, retrievedNewVariant) + //Check to see if the surrounding refBlocks were updated correctly + val leftRefBlock = founderVariantMap.get(Position("chr1", 174)) + assertEquals(SimpleVariant(Position("chr1", 151), Position("chr1", 174), "T", ""), leftRefBlock) + val rightRefBlock = founderVariantMap.get(Position("chr1", 176)) + assertEquals(SimpleVariant(Position("chr1", 176), Position("chr1", 200), "T", ""), rightRefBlock) + + + //Leaving this in for now as I may want to re-implement this functionality later +// //add an indel that is fully contained within an existing indel +// val newIndelVariant = SimpleVariant(Position("chr1", 405), Position("chr1", 407), "GGG", "G") +// mutateAssemblies.updateOverlappingVariant(founderVariantMap, newIndelVariant) +// assertEquals(initialSize, founderVariantMap.asMapOfRanges().size)//Should stay the same size +// //The first ref block should be extended +// val firstRefBlock = founderVariantMap.get(Position("chr1", 401)) +// assertEquals(SimpleVariant(Position("chr1", 302), Position("chr1", 404), "T", ""), firstRefBlock) +// //The new indel should be present +// val retrievedIndel = founderVariantMap.get(Position("chr1", 405)) +// assertEquals(newIndelVariant, retrievedIndel) +// //The last ref block should be extended +// val lastRefBlock = founderVariantMap.get(Position("chr1", 408)) +// assertEquals(SimpleVariant(Position("chr1", 408), Position("chr1", 500), "G", ""), lastRefBlock) + + } + + @Test + fun testWriteMutatedGVCF() { + val mutateAssemblies = MutateAssemblies() + + + File(outputDir).mkdirs() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + mutateAssemblies.writeMutatedGVCF(File(outputDir), "testSample", founderVariantMap) + + //load in the GVCF and check to make sure the variants match + VCFFileReader(File("${outputDir}testSample_mutated.g.vcf"), false).use { vcfReader -> + val variants = vcfReader.iterator().asSequence().toList() + + //should be 7 variants + assertEquals(7, variants.size) + + //check a few variants + val variant150 = variants.find { it.contig == "chr1" && it.start == 150 } + assertEquals("G", variant150!!.genotypes.get(0).alleles[0].displayString) + + val variant301 = variants.find { it.contig == "chr1" && it.start == 301 } + assertEquals("C", variant301!!.genotypes.get(0).alleles[0].displayString) + + //check one of the indels position 201 + val variant201 = variants.find { it.contig == "chr1" && it.start == 201 } + assertEquals("GGGGG", variant201!!.reference.baseString) + assertEquals("G", variant201!!.genotypes.get(0).alleles[0].displayString) + + } + + } + + + @Test + fun testExtractVCAndAddToRangeMap() { + val mutateAssemblies = MutateAssemblies() + + //need a range map to work with + val founderVariantMap = createSimpleFounderMap() + + val testVariantContextSNPBuilder = VariantContextBuilder() + .chr("chr1") + .start(150) + .stop(150) + .alleles(listOf(Allele.create("C", true), Allele.create("A", false))).make() + + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPBuilder, founderVariantMap) + + //Size should stay the same, the variant should be different + assertEquals(7, founderVariantMap.asMapOfRanges().size) + val variant150 = founderVariantMap.get(Position("chr1", 150)) + assertEquals("A", variant150!!.altAllele) + + //Split a ref block with a SNP + val testVariantContextSNP2Builder = VariantContextBuilder() + .chr("chr1") + .start(175) + .stop(175) + .alleles(listOf(Allele.create("A", true), Allele.create("T", false))).make() + + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNP2Builder, founderVariantMap) + //Size should increase by 2, 1 for new SNP and 1 for trailing ref block + assertEquals(9, founderVariantMap.asMapOfRanges().size) + val variant175 = founderVariantMap.get(Position("chr1", 175)) + assertEquals("T", variant175!!.altAllele) + + + //Introduce a RefBlock overlapping a SNP This should skip + val testVariantContextRefBlockBuilder = VariantContextBuilder() + .chr("chr1") + .start(150) + .stop(200) + .alleles(listOf(Allele.create("C", true), Allele.create("", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextRefBlockBuilder, founderVariantMap) + //Size should keep same size as we skip introducing a ref blocks + assertEquals(9, founderVariantMap.asMapOfRanges().size) + + //add in a completely new variant + val testVariantContextNewSNPBuilder = VariantContextBuilder() + .chr("chr1") + .start(500) + .stop(500) + .alleles(listOf(Allele.create("A", true), Allele.create("G", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextNewSNPBuilder, founderVariantMap) + //Size should increase by 2, 1 for new SNP and 1 for trailing ref block + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant250 = founderVariantMap.get(Position("chr1", 500)) + assertEquals("G", variant250!!.altAllele) + + //Try to add an overlapping indel + val testVariantContextIndelBuilder = VariantContextBuilder() + .chr("chr1") + .start(201) + .stop(203) + .alleles(listOf(Allele.create("GGG", true), Allele.create("G", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextIndelBuilder, founderVariantMap) + //Size should stay the same + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant201 = founderVariantMap.get(Position("chr1", 201)) + assertEquals("G", variant201!!.altAllele) + + //Try to add a snp at an indel... this should not add anything + val testVariantContextSNPAtIndelBuilder = VariantContextBuilder() + .chr("chr1") + .start(202) + .stop(202) + .alleles(listOf(Allele.create("G", true), Allele.create("A", false))).make() + mutateAssemblies.extractVCAndAddToRangeMap(testVariantContextSNPAtIndelBuilder, founderVariantMap) + //Size should stay the same + assertEquals(10, founderVariantMap.asMapOfRanges().size) + val variant202 = founderVariantMap.get(Position("chr1", 202)) + assertEquals("G", variant202!!.altAllele) + + + + } + + @Test + fun testIsIndel() { + val mutateAssemblies = MutateAssemblies() + + val refBlockVariant = SimpleVariant(Position("chr1", 100), Position("chr1", 150), "A", "") + assert(!mutateAssemblies.isIndel(refBlockVariant)) + + val snpVariant = SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G") + assert(!mutateAssemblies.isIndel(snpVariant)) + + val insertionVariant = SimpleVariant(Position("chr1", 200), Position("chr1", 200), "A", "AGG") + assert(mutateAssemblies.isIndel(insertionVariant)) + + val deletionVariant = SimpleVariant(Position("chr1", 201), Position("chr1", 205), "GGGGG", "G") + assert(mutateAssemblies.isIndel(deletionVariant)) + } + + private fun createSimpleFounderMap(): RangeMap { + val rangeMap = TreeRangeMap.create() + + rangeMap.put(Range.closed(Position("chr1", 100), Position("chr1", 150)), SimpleVariant(Position("chr1", 100), Position("chr1", 150), "A", "")) + rangeMap.put(Range.closed(Position("chr1", 150), Position("chr1", 150)), SimpleVariant(Position("chr1", 150), Position("chr1", 150), "C", "G")) + rangeMap.put(Range.closed(Position("chr1", 151), Position("chr1", 200)), SimpleVariant(Position("chr1", 151), Position("chr1", 200), "T", "")) + rangeMap.put(Range.closed(Position("chr1", 201), Position("chr1", 205)), SimpleVariant(Position("chr1", 201), Position("chr1", 205), "GGGGG", "G")) + rangeMap.put(Range.closed(Position("chr1", 206), Position("chr1", 300)), SimpleVariant(Position("chr1", 206), Position("chr1", 300), "A", "")) + rangeMap.put(Range.closed(Position("chr1", 301), Position("chr1", 301)), SimpleVariant(Position("chr1", 301), Position("chr1", 301), "G", "C")) + rangeMap.put(Range.closed(Position("chr1", 302), Position("chr1", 400)), SimpleVariant(Position("chr1", 302), Position("chr1", 400), "T", "")) + +// rangeMap.put(Range.closed(Position("chr1",401), Position("chr1",410)), SimpleVariant(Position("chr1",401), Position("chr1",410), "GGGGGGGGGG", "G")) +// rangeMap.put(Range.closed(Position("chr1",411), Position("chr1",500)), SimpleVariant(Position("chr1",411), Position("chr1",500), "C", "")) + return rangeMap + } +} \ No newline at end of file