From fc16b6060f10fffd24da686b7ca5d5042498dfd4 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 14:42:27 -0600 Subject: [PATCH 1/8] Remove all Gradle calls --- build.gradle.kts | 2 +- .../net/maizegenetics/commands/Orchestrate.kt | 112 ++---------------- 2 files changed, 11 insertions(+), 103 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 46128a6..6f64512 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -4,7 +4,7 @@ plugins { } group = "net.maizegenetics" -version = "0.2.5" +version = "0.2.6" repositories { mavenCentral() diff --git a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt index 7e53ddc..6b6ae43 100644 --- a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt +++ b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt @@ -8,11 +8,9 @@ import com.github.ajalt.clikt.parameters.options.required import com.github.ajalt.clikt.parameters.types.path import net.maizegenetics.Constants import net.maizegenetics.utils.LoggingUtils -import net.maizegenetics.utils.ProcessRunner import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import org.yaml.snakeyaml.Yaml -import java.io.File import java.nio.file.Path import kotlin.io.path.* import kotlin.system.exitProcess @@ -402,7 +400,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.align_assemblies.output?.let { Path.of(it) } val args = buildList { - add("align-assemblies") add("--work-dir=${workDir}") add("--ref-gff=${config.align_assemblies.ref_gff}") add("--ref-fasta=${config.align_assemblies.ref_fasta}") @@ -415,15 +412,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("align-assemblies failed with exit code $exitCode") - } + AlignAssemblies().parse(args) // Get output path (use custom or default) val outputBase = customOutput ?: workDir.resolve("output").resolve("01_anchorwave_results") @@ -480,7 +469,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.maf_to_gvcf.output?.let { Path.of(it) } val args = buildList { - add("maf-to-gvcf") add("--work-dir=${workDir}") add("--reference-file=${refFasta}") add("--maf-file=${mafInput}") @@ -492,15 +480,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("maf-to-gvcf failed with exit code $exitCode") - } + MafToGvcf().parse(args) // Get output directory (use custom or default) gvcfOutputDir = customOutput ?: workDir.resolve("output").resolve("02_gvcf_results") @@ -547,7 +527,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.downsample_gvcf.output?.let { Path.of(it) } val args = buildList { - add("downsample-gvcf") add("--work-dir=${workDir}") add("--gvcf-dir=${gvcfInput}") if (config.downsample_gvcf.ignore_contig != null) { @@ -570,15 +549,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("downsample-gvcf failed with exit code $exitCode") - } + DownsampleGvcf().parse(args) // Get output directory (use custom or default) downsampledGvcfOutputDir = customOutput ?: workDir.resolve("output").resolve("03_downsample_results") @@ -628,7 +599,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.convert_to_fasta.output?.let { Path.of(it) } val args = buildList { - add("convert-to-fasta") add("--work-dir=${workDir}") add("--gvcf-file=${gvcfInput}") add("--ref-fasta=${refFasta}") @@ -643,15 +613,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("convert-to-fasta failed with exit code $exitCode") - } + ConvertToFasta().parse(args) // Get output directory for downstream use (use custom or default) fastaOutputDir = customOutput ?: workDir.resolve("output").resolve("04_fasta_results") @@ -751,7 +713,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.align_mutated_assemblies.output?.let { Path.of(it) } val args = buildList { - add("align-mutated-assemblies") add("--work-dir=${workDir}") add("--ref-gff=${step5RefGff}") add("--ref-fasta=${step5RefFasta}") @@ -764,15 +725,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("align-mutated-assemblies failed with exit code $exitCode") - } + AlignMutatedAssemblies().parse(args) // Save the mutated reference FASTA for use in step 6 mutatedRefFasta = step5RefFasta @@ -826,7 +779,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.pick_crossovers.output?.let { Path.of(it) } val args = buildList { - add("pick-crossovers") add("--work-dir=${workDir}") add("--ref-fasta=${pickCrossoversRefFasta}") add("--assembly-list=${config.pick_crossovers.assembly_list}") @@ -835,15 +787,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("pick-crossovers failed with exit code $exitCode") - } + PickCrossovers().parse(args) // Get output directory (use custom or default) refkeyOutputDir = customOutput ?: workDir.resolve("output").resolve("06_crossovers_results") @@ -889,7 +833,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.create_chain_files.output?.let { Path.of(it) } val args = buildList { - add("create-chain-files") add("--work-dir=${workDir}") add("--maf-input=${mafInput}") if (config.create_chain_files.jobs != null) { @@ -900,15 +843,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("create-chain-files failed with exit code $exitCode") - } + CreateChainFiles().parse(args) // Get output directory (use custom or default) chainOutputDir = customOutput ?: workDir.resolve("output").resolve("07_chain_results") @@ -957,7 +892,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.convert_coordinates.output?.let { Path.of(it) } val args = buildList { - add("convert-coordinates") add("--work-dir=${workDir}") add("--assembly-list=${config.convert_coordinates.assembly_list}") add("--chain-dir=${chainInput}") @@ -969,15 +903,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("convert-coordinates failed with exit code $exitCode") - } + ConvertCoordinates().parse(args) // Get output directory (use custom or default) coordinatesOutputDir = customOutput ?: workDir.resolve("output").resolve("08_coordinates_results") @@ -1023,7 +949,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.generate_recombined_sequences.output?.let { Path.of(it) } val args = buildList { - add("generate-recombined-sequences") add("--work-dir=${workDir}") add("--assembly-list=${config.generate_recombined_sequences.assembly_list}") add("--chromosome-list=${config.generate_recombined_sequences.chromosome_list}") @@ -1034,15 +959,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("generate-recombined-sequences failed with exit code $exitCode") - } + GenerateRecombinedSequences().parse(args) // Get output directory (use custom or default) val outputBase = customOutput ?: workDir.resolve("output").resolve("09_recombined_sequences") @@ -1086,7 +1003,6 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val customOutput = config.format_recombined_fastas.output?.let { Path.of(it) } val args = buildList { - add("format-recombined-fastas") add("--work-dir=${workDir}") add("--fasta-input=${fastaInput}") if (config.format_recombined_fastas.line_width != null) { @@ -1100,15 +1016,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } - val exitCode = ProcessRunner.runCommand( - "./gradlew", "run", "--args=${args.joinToString(" ")}", - workingDir = File("."), - logger = logger - ) - - if (exitCode != 0) { - throw RuntimeException("format-recombined-fastas failed with exit code $exitCode") - } + FormatRecombinedFastas().parse(args) // Get output directory (use custom or default) formattedFastasDir = customOutput ?: workDir.resolve("output").resolve("10_formatted_fastas") From 254053645dc62cec70ab951a832c831a45526ed1 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 16:01:00 -0600 Subject: [PATCH 2/8] Update MafToGvcf parameters --- pipeline_config.example.yaml | 10 +- .../net/maizegenetics/commands/Orchestrate.kt | 113 ++++++++++++------ .../maizegenetics/utils/ValidationUtils.kt | 2 +- 3 files changed, 84 insertions(+), 41 deletions(-) diff --git a/pipeline_config.example.yaml b/pipeline_config.example.yaml index 87d81a2..50068be 100644 --- a/pipeline_config.example.yaml +++ b/pipeline_config.example.yaml @@ -79,9 +79,13 @@ align_assemblies: # Converts MAF files from align_assemblies to compressed GVCF format # Automatically uses ref_fasta from align_assemblies and MAF output paths maf_to_gvcf: - sample_name: "optional_sample_name" # Optional: Override sample name in GVCF - # input: "/custom/maf/files.txt" # Optional: Custom MAF input (file, directory, or text list) - # output: "/custom/gvcf/output/" # Optional: Custom output directory + # reference_file: "path/to/reference.fa" # Optional: Reference FASTA file + # Uses align_assemblies.ref_fasta if not specified + # maf_file: "/custom/maf/files.txt" # Optional: MAF file, directory, or text list + # Uses step 1 MAF outputs if not specified + # output_file: "sample.g.vcf.gz" # Optional: Output GVCF filename (auto-generated if not specified) + # sample_name: "optional_sample_name" # Optional: Override sample name in GVCF + # output_dir: "/custom/gvcf/output/" # Optional: Custom output directory # Step 3: Downsample GVCF # Downsamples GVCF files at specified rates per chromosome diff --git a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt index 6b6ae43..a7ab3f8 100644 --- a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt +++ b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt @@ -42,9 +42,11 @@ data class AlignAssembliesConfig( ) data class MafToGvcfConfig( - val sample_name: String? = null, - val input: String? = null, // Custom MAF file/directory path - val output: String? = null // Custom GVCF output directory + val reference_file: String? = null, // Optional: Reference FASTA (uses align_assemblies.ref_fasta if not specified) + val maf_file: String? = null, // Optional: MAF file/directory/list (uses step 1 output if not specified) + val output_file: String? = null, // Optional: Output GVCF file name + val sample_name: String? = null, // Optional: Sample name for GVCF + val output_dir: String? = null // Optional: Custom GVCF output directory ) data class DownsampleGvcfConfig( @@ -210,9 +212,11 @@ class Orchestrate : CliktCommand(name = "orchestrate") { val mafToGvcfMap = configMap["maf_to_gvcf"] as? Map val mafToGvcf = mafToGvcfMap?.let { MafToGvcfConfig( + reference_file = it["reference_file"] as? String, + maf_file = it["maf_file"] as? String, + output_file = it["output_file"] as? String, sample_name = it["sample_name"] as? String, - input = it["input"] as? String, - output = it["output"] as? String + output_dir = it["output_dir"] as? String ) } @@ -339,8 +343,8 @@ class Orchestrate : CliktCommand(name = "orchestrate") { // Parse configuration val config = parseYamlConfig(configFile) - // Determine working directory - val workDir = Path.of(config.work_dir ?: Constants.DEFAULT_WORK_DIR) + // Determine working directory and resolve to absolute path for consistency + val workDir = Path.of(config.work_dir ?: Constants.DEFAULT_WORK_DIR).toAbsolutePath().normalize() // Auto-detect and run setup-environment if needed logger.info("Validating environment setup...") @@ -393,22 +397,30 @@ class Orchestrate : CliktCommand(name = "orchestrate") { logger.info("STEP 1: Align Assemblies") logger.info("=".repeat(80)) - refFasta = Path.of(config.align_assemblies.ref_fasta) - refGff = Path.of(config.align_assemblies.ref_gff) + // Resolve all paths to absolute paths for consistency + refFasta = Path.of(config.align_assemblies.ref_fasta).toAbsolutePath().normalize() + refGff = Path.of(config.align_assemblies.ref_gff).toAbsolutePath().normalize() + val queryFasta = Path.of(config.align_assemblies.query_fasta).toAbsolutePath().normalize() - // Determine output directory (custom or default) - val customOutput = config.align_assemblies.output?.let { Path.of(it) } + // Determine output directory (custom or default) - also resolve to absolute path + val customOutput = config.align_assemblies.output?.let { + Path.of(it).toAbsolutePath().normalize() + } + + logger.info("Reference GFF: $refGff") + logger.info("Reference FASTA: $refFasta") + logger.info("Query FASTA: $queryFasta") val args = buildList { - add("--work-dir=${workDir}") - add("--ref-gff=${config.align_assemblies.ref_gff}") - add("--ref-fasta=${config.align_assemblies.ref_fasta}") - add("--query-fasta=${config.align_assemblies.query_fasta}") + add("--work-dir=$workDir") + add("--ref-gff=$refGff") + add("--ref-fasta=$refFasta") + add("--query-fasta=$queryFasta") if (config.align_assemblies.threads != null) { add("--threads=${config.align_assemblies.threads}") } if (customOutput != null) { - add("--output-dir=${customOutput}") + add("--output-dir=$customOutput") } } @@ -416,7 +428,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { // Get output path (use custom or default) val outputBase = customOutput ?: workDir.resolve("output").resolve("01_anchorwave_results") - mafFilePaths = outputBase.resolve("maf_file_paths.txt") + mafFilePaths = outputBase.toAbsolutePath().normalize().resolve("maf_file_paths.txt") if (!mafFilePaths.exists()) { throw RuntimeException("Expected MAF paths file not found: $mafFilePaths") @@ -429,13 +441,16 @@ class Orchestrate : CliktCommand(name = "orchestrate") { if (config.align_assemblies != null) { logger.info("Skipping align-assemblies (not in run_steps)") - // Try to use outputs from previous run - refFasta = Path.of(config.align_assemblies.ref_fasta) - refGff = Path.of(config.align_assemblies.ref_gff) + // Try to use outputs from previous run - resolve to absolute paths + refFasta = Path.of(config.align_assemblies.ref_fasta).toAbsolutePath().normalize() + refGff = Path.of(config.align_assemblies.ref_gff).toAbsolutePath().normalize() // Check custom output location first, then default - val customOutput = config.align_assemblies.output?.let { Path.of(it) } - val outputBase = customOutput ?: workDir.resolve("output").resolve("01_anchorwave_results") + val customOutput = config.align_assemblies.output?.let { + Path.of(it).toAbsolutePath().normalize() + } + val outputBase = (customOutput ?: workDir.resolve("output").resolve("01_anchorwave_results")) + .toAbsolutePath().normalize() val previousMafPaths = outputBase.resolve("maf_file_paths.txt") if (previousMafPaths.exists()) { @@ -456,34 +471,55 @@ class Orchestrate : CliktCommand(name = "orchestrate") { logger.info("STEP 2: MAF to GVCF Conversion") logger.info("=".repeat(80)) - // Determine input (custom or from previous step) - val mafInput = config.maf_to_gvcf.input?.let { Path.of(it) } ?: mafFilePaths + // Determine reference file (custom or from step 1) - resolve to absolute path + val step2RefFasta = config.maf_to_gvcf.reference_file?.let { + Path.of(it).toAbsolutePath().normalize() + } ?: refFasta + if (step2RefFasta == null) { + throw RuntimeException("Cannot run maf-to-gvcf: reference FASTA not available (specify 'reference_file' in config or run align-assemblies first)") + } + + // Determine MAF input (custom or from step 1) - resolve to absolute path + val mafInput = config.maf_to_gvcf.maf_file?.let { + Path.of(it).toAbsolutePath().normalize() + } ?: mafFilePaths if (mafInput == null) { - throw RuntimeException("Cannot run maf-to-gvcf: no MAF input available (specify 'input' in config or run align-assemblies first)") + throw RuntimeException("Cannot run maf-to-gvcf: no MAF input available (specify 'maf_file' in config or run align-assemblies first)") } - if (refFasta == null) { - throw RuntimeException("Cannot run maf-to-gvcf: reference FASTA not available") + + // Determine output directory (custom or default) - resolve to absolute path + val customOutputDir = config.maf_to_gvcf.output_dir?.let { + Path.of(it).toAbsolutePath().normalize() } - // Determine output directory (custom or default) - val customOutput = config.maf_to_gvcf.output?.let { Path.of(it) } + // Determine output file if specified - resolve to absolute path + val outputFile = config.maf_to_gvcf.output_file?.let { + Path.of(it).toAbsolutePath().normalize() + } + + logger.info("Reference FASTA: $step2RefFasta") + logger.info("MAF input: $mafInput") val args = buildList { - add("--work-dir=${workDir}") - add("--reference-file=${refFasta}") - add("--maf-file=${mafInput}") + add("--work-dir=$workDir") + add("--reference-file=$step2RefFasta") + add("--maf-file=$mafInput") + if (outputFile != null) { + add("--output-file=$outputFile") + } if (config.maf_to_gvcf.sample_name != null) { add("--sample-name=${config.maf_to_gvcf.sample_name}") } - if (customOutput != null) { - add("--output-dir=${customOutput}") + if (customOutputDir != null) { + add("--output-dir=$customOutputDir") } } MafToGvcf().parse(args) // Get output directory (use custom or default) - gvcfOutputDir = customOutput ?: workDir.resolve("output").resolve("02_gvcf_results") + gvcfOutputDir = (customOutputDir ?: workDir.resolve("output").resolve("02_gvcf_results")) + .toAbsolutePath().normalize() if (!gvcfOutputDir.exists()) { throw RuntimeException("Expected GVCF output directory not found: $gvcfOutputDir") @@ -497,8 +533,11 @@ class Orchestrate : CliktCommand(name = "orchestrate") { logger.info("Skipping maf-to-gvcf (not in run_steps)") // Check custom output location first, then default - val customOutput = config.maf_to_gvcf.output?.let { Path.of(it) } - val previousGvcfDir = customOutput ?: workDir.resolve("output").resolve("02_gvcf_results") + val customOutputDir = config.maf_to_gvcf.output_dir?.let { + Path.of(it).toAbsolutePath().normalize() + } + val previousGvcfDir = (customOutputDir ?: workDir.resolve("output").resolve("02_gvcf_results")) + .toAbsolutePath().normalize() if (previousGvcfDir.exists()) { gvcfOutputDir = previousGvcfDir logger.info("Using previous maf-to-gvcf outputs: $gvcfOutputDir") diff --git a/src/main/kotlin/net/maizegenetics/utils/ValidationUtils.kt b/src/main/kotlin/net/maizegenetics/utils/ValidationUtils.kt index 196664e..990c566 100644 --- a/src/main/kotlin/net/maizegenetics/utils/ValidationUtils.kt +++ b/src/main/kotlin/net/maizegenetics/utils/ValidationUtils.kt @@ -81,7 +81,7 @@ object ValidationUtils { */ fun validateBiokotlinSetup(workDir: Path, logger: Logger): Path { validateWorkingDirectory(workDir, logger) - val bioktBinary = resolveBinaryPath(workDir, Constants.BIOKOTLIN_TOOLS_DIR, "biokotlin") + val bioktBinary = resolveBinaryPath(workDir, Constants.BIOKOTLIN_TOOLS_DIR, "biokotlin-tools") validateBinaryExists(bioktBinary, "biokotlin-tools", logger) return bioktBinary } From 4aee751cfaa910a469fc25aa47e18a7b7e08d642 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 16:11:07 -0600 Subject: [PATCH 3/8] Fix commands with fully optional parameters. --- .../net/maizegenetics/commands/Orchestrate.kt | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt index a7ab3f8..7ca8b71 100644 --- a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt +++ b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt @@ -207,58 +207,58 @@ class Orchestrate : CliktCommand(name = "orchestrate") { ) } - // Parse maf_to_gvcf + // Parse maf_to_gvcf - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val mafToGvcfMap = configMap["maf_to_gvcf"] as? Map - val mafToGvcf = mafToGvcfMap?.let { + val mafToGvcf = if (configMap.containsKey("maf_to_gvcf")) { MafToGvcfConfig( - reference_file = it["reference_file"] as? String, - maf_file = it["maf_file"] as? String, - output_file = it["output_file"] as? String, - sample_name = it["sample_name"] as? String, - output_dir = it["output_dir"] as? String + reference_file = mafToGvcfMap?.get("reference_file") as? String, + maf_file = mafToGvcfMap?.get("maf_file") as? String, + output_file = mafToGvcfMap?.get("output_file") as? String, + sample_name = mafToGvcfMap?.get("sample_name") as? String, + output_dir = mafToGvcfMap?.get("output_dir") as? String ) - } + } else null - // Parse downsample_gvcf + // Parse downsample_gvcf - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val downsampleGvcfMap = configMap["downsample_gvcf"] as? Map - val downsampleGvcf = downsampleGvcfMap?.let { + val downsampleGvcf = if (configMap.containsKey("downsample_gvcf")) { DownsampleGvcfConfig( - ignore_contig = it["ignore_contig"] as? String, - rates = it["rates"] as? String, - seed = it["seed"] as? Int, - keep_ref = it["keep_ref"] as? Boolean, - min_ref_block_size = it["min_ref_block_size"] as? Int, - input = it["input"] as? String, - output = it["output"] as? String + ignore_contig = downsampleGvcfMap?.get("ignore_contig") as? String, + rates = downsampleGvcfMap?.get("rates") as? String, + seed = downsampleGvcfMap?.get("seed") as? Int, + keep_ref = downsampleGvcfMap?.get("keep_ref") as? Boolean, + min_ref_block_size = downsampleGvcfMap?.get("min_ref_block_size") as? Int, + input = downsampleGvcfMap?.get("input") as? String, + output = downsampleGvcfMap?.get("output") as? String ) - } + } else null - // Parse convert_to_fasta + // Parse convert_to_fasta - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val convertToFastaMap = configMap["convert_to_fasta"] as? Map - val convertToFasta = convertToFastaMap?.let { + val convertToFasta = if (configMap.containsKey("convert_to_fasta")) { ConvertToFastaConfig( - missing_records_as = it["missing_records_as"] as? String, - missing_genotype_as = it["missing_genotype_as"] as? String, - input = it["input"] as? String, - output = it["output"] as? String + missing_records_as = convertToFastaMap?.get("missing_records_as") as? String, + missing_genotype_as = convertToFastaMap?.get("missing_genotype_as") as? String, + input = convertToFastaMap?.get("input") as? String, + output = convertToFastaMap?.get("output") as? String ) - } + } else null - // Parse align_mutated_assemblies + // Parse align_mutated_assemblies - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val alignMutatedAssembliesMap = configMap["align_mutated_assemblies"] as? Map - val alignMutatedAssemblies = alignMutatedAssembliesMap?.let { + val alignMutatedAssemblies = if (configMap.containsKey("align_mutated_assemblies")) { AlignMutatedAssembliesConfig( - ref_gff = it["ref_gff"] as? String, - ref_fasta = it["ref_fasta"] as? String, - fasta_input = it["fasta_input"] as? String, - threads = it["threads"] as? Int, - output = it["output"] as? String + ref_gff = alignMutatedAssembliesMap?.get("ref_gff") as? String, + ref_fasta = alignMutatedAssembliesMap?.get("ref_fasta") as? String, + fasta_input = alignMutatedAssembliesMap?.get("fasta_input") as? String, + threads = alignMutatedAssembliesMap?.get("threads") as? Int, + output = alignMutatedAssembliesMap?.get("output") as? String ) - } + } else null // Parse pick_crossovers @Suppress("UNCHECKED_CAST") @@ -271,16 +271,16 @@ class Orchestrate : CliktCommand(name = "orchestrate") { ) } - // Parse create_chain_files + // Parse create_chain_files - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val createChainFilesMap = configMap["create_chain_files"] as? Map - val createChainFiles = createChainFilesMap?.let { + val createChainFiles = if (configMap.containsKey("create_chain_files")) { CreateChainFilesConfig( - jobs = it["jobs"] as? Int, - input = it["input"] as? String, - output = it["output"] as? String + jobs = createChainFilesMap?.get("jobs") as? Int, + input = createChainFilesMap?.get("input") as? String, + output = createChainFilesMap?.get("output") as? String ) - } + } else null // Parse convert_coordinates @Suppress("UNCHECKED_CAST") @@ -307,17 +307,17 @@ class Orchestrate : CliktCommand(name = "orchestrate") { ) } - // Parse format_recombined_fastas + // Parse format_recombined_fastas - check if key exists (even with empty/null value means "run with defaults") @Suppress("UNCHECKED_CAST") val formatRecombinedFastasMap = configMap["format_recombined_fastas"] as? Map - val formatRecombinedFastas = formatRecombinedFastasMap?.let { + val formatRecombinedFastas = if (configMap.containsKey("format_recombined_fastas")) { FormatRecombinedFastasConfig( - line_width = it["line_width"] as? Int, - threads = it["threads"] as? Int, - input = it["input"] as? String, - output = it["output"] as? String + line_width = formatRecombinedFastasMap?.get("line_width") as? Int, + threads = formatRecombinedFastasMap?.get("threads") as? Int, + input = formatRecombinedFastasMap?.get("input") as? String, + output = formatRecombinedFastasMap?.get("output") as? String ) - } + } else null return PipelineConfig( work_dir = workDir, From 5960e7b6b6c71f44a2e8aa1fd167882d954e5d74 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 16:28:21 -0600 Subject: [PATCH 4/8] Fix biokotlin-tools gzip naming extension --- .../net/maizegenetics/commands/MafToGvcf.kt | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/MafToGvcf.kt b/src/main/kotlin/net/maizegenetics/commands/MafToGvcf.kt index 1a38d6f..7173ef3 100644 --- a/src/main/kotlin/net/maizegenetics/commands/MafToGvcf.kt +++ b/src/main/kotlin/net/maizegenetics/commands/MafToGvcf.kt @@ -143,15 +143,29 @@ class MafToGvcf : CliktCommand(name = "maf-to-gvcf") { logger.info("Sample name: $finalSampleName") // Determine output file name - val outputFileName = if (outputFile != null && isSingleMaf) { - outputFile!!.fileName + // Note: biokotlin-tools automatically compresses and adds .gz extension + val outputFileName: Path + val expectedOutputPath: Path + + if (outputFile != null && isSingleMaf) { + val userFileName = outputFile!!.fileName.toString() + // If user specified .gz extension, strip it since biokotlin-tools adds it + if (userFileName.endsWith(".gz")) { + outputFileName = Path.of(userFileName.removeSuffix(".gz")) + expectedOutputPath = outputDir.resolve(userFileName) + } else { + outputFileName = outputFile!!.fileName + expectedOutputPath = outputDir.resolve("${userFileName}.gz") + } } else { - // Auto-generate output filename based on MAF filename (compressed) - Path.of("${mafBaseName}.g.vcf.gz") + // Auto-generate output filename based on MAF filename + // biokotlin-tools will add .gz when compressing + outputFileName = Path.of("${mafBaseName}.g.vcf") + expectedOutputPath = outputDir.resolve("${mafBaseName}.g.vcf.gz") } val fullOutputPath = outputDir.resolve(outputFileName) - logger.info("Output file: $fullOutputPath") + logger.info("Output file: $expectedOutputPath") // Run biokotlin-tools maf-to-gvcf-converter through pixi to use Java 21 logger.info("Running biokotlin-tools maf-to-gvcf-converter") @@ -173,7 +187,7 @@ class MafToGvcf : CliktCommand(name = "maf-to-gvcf") { logger.info("Conversion completed for: ${mafFile.name}") - // Return the GVCF file path - return fullOutputPath + // Return the GVCF file path (with .gz extension added by biokotlin-tools) + return expectedOutputPath } } From 5550ee0be0d71eae38ac02662e66db6601283c49 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 17:52:53 -0600 Subject: [PATCH 5/8] Fix appending issues --- pipeline_config.example.yaml | 2 +- .../net/maizegenetics/utils/LoggingUtils.kt | 21 +++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pipeline_config.example.yaml b/pipeline_config.example.yaml index 50068be..f2ae5bb 100644 --- a/pipeline_config.example.yaml +++ b/pipeline_config.example.yaml @@ -91,7 +91,7 @@ maf_to_gvcf: # Downsamples GVCF files at specified rates per chromosome # Automatically uses GVCF output directory from maf_to_gvcf downsample_gvcf: - ignore_contig: "" # Optional: Comma-separated patterns to ignore + ignore_contig: "__NO_MATCH__" # Optional: Comma-separated patterns to ignore rates: "0.01,0.05,0.1,0.15,0.2" # Optional: Comma-separated downsampling rates seed: 42 # Optional: Random seed for reproducibility keep_ref: true # Optional: Keep reference blocks (true/false, default: true) diff --git a/src/main/kotlin/net/maizegenetics/utils/LoggingUtils.kt b/src/main/kotlin/net/maizegenetics/utils/LoggingUtils.kt index 0f3e613..fa73f88 100644 --- a/src/main/kotlin/net/maizegenetics/utils/LoggingUtils.kt +++ b/src/main/kotlin/net/maizegenetics/utils/LoggingUtils.kt @@ -14,6 +14,7 @@ import kotlin.io.path.exists object LoggingUtils { private const val LOG_PATTERN = "%d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n" + private const val FILE_APPENDER_PREFIX = "FileAppender_" /** * Sets up file logging for a command in the working directory @@ -33,15 +34,23 @@ object LoggingUtils { val config = context.configuration // Use a unique appender name based on the log file name - val appenderName = "FileAppender_${logFileName.replace(".", "_")}" + val appenderName = "${FILE_APPENDER_PREFIX}${logFileName.replace(".", "_")}" - // Check if appender already exists and remove it - val existingAppender: Appender? = config.getAppender(appenderName) - if (existingAppender != null) { - config.rootLogger.removeAppender(appenderName) + // Remove ALL existing file appenders (from previous steps) to prevent log bleeding + val appendersToRemove = config.appenders.keys + .filter { it.startsWith(FILE_APPENDER_PREFIX) } + .toList() + + for (appenderToRemove in appendersToRemove) { + config.rootLogger.removeAppender(appenderToRemove) val maizeLoggerConfig = config.getLoggerConfig("net.maizegenetics") if (maizeLoggerConfig != null) { - maizeLoggerConfig.removeAppender(appenderName) + maizeLoggerConfig.removeAppender(appenderToRemove) + } + // Stop the appender + val existingAppender: Appender? = config.getAppender(appenderToRemove) + if (existingAppender is FileAppender) { + existingAppender.stop() } } From 91180a71cd6d8bb4c1d446d4b935d8ac73532bc5 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 18:03:43 -0600 Subject: [PATCH 6/8] Fix logging issues; fix stale temp directory removal --- .../maizegenetics/commands/ConvertToFasta.kt | 4 ++-- .../maizegenetics/commands/DownsampleGvcf.kt | 3 ++- .../net/maizegenetics/commands/Orchestrate.kt | 20 +++++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt b/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt index 72f5a4c..dc4d7a9 100644 --- a/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt +++ b/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt @@ -211,8 +211,8 @@ class ConvertToFasta : CliktCommand(name = "convert-to-fasta") { "FASTA file" ) - // Clean up temp directory - if (tempDir.exists() && preparedFiles.any { it.parent == tempDir }) { + // Clean up temp directory - always clean up if it exists + if (tempDir.exists()) { logger.info("Cleaning up temporary uncompressed files") try { tempDir.deleteRecursively() diff --git a/src/main/kotlin/net/maizegenetics/commands/DownsampleGvcf.kt b/src/main/kotlin/net/maizegenetics/commands/DownsampleGvcf.kt index 5728e5d..a06c61f 100644 --- a/src/main/kotlin/net/maizegenetics/commands/DownsampleGvcf.kt +++ b/src/main/kotlin/net/maizegenetics/commands/DownsampleGvcf.kt @@ -233,7 +233,8 @@ class DownsampleGvcf : CliktCommand(name = "downsample-gvcf") { logger.info("Output directory: $outputDir") // Clean up temp directory if not keeping uncompressed files - if (!keepUncompressed && tempDir.exists() && mlimputeInputDir == tempDir) { + // Always clean up if temp directory exists and was used or created + if (!keepUncompressed && tempDir.exists()) { logger.info("Cleaning up temporary uncompressed files") try { tempDir.deleteRecursively() diff --git a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt index 7ca8b71..d66b585 100644 --- a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt +++ b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt @@ -178,6 +178,16 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } } + /** + * Restores the orchestrator's log file after a step command has run. + * Each step command sets up its own log file, so we need to restore + * the orchestrator's log file to ensure orchestrator messages go to + * the correct log file. + */ + private fun restoreOrchestratorLogging(workDir: Path) { + LoggingUtils.setupFileLogging(workDir, LOG_FILE_NAME, logger) + } + private fun parseYamlConfig(configPath: Path): PipelineConfig { logger.info("Parsing configuration file: $configPath") @@ -425,6 +435,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } AlignAssemblies().parse(args) + restoreOrchestratorLogging(workDir) // Get output path (use custom or default) val outputBase = customOutput ?: workDir.resolve("output").resolve("01_anchorwave_results") @@ -516,6 +527,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } MafToGvcf().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) gvcfOutputDir = (customOutputDir ?: workDir.resolve("output").resolve("02_gvcf_results")) @@ -589,6 +601,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } DownsampleGvcf().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) downsampledGvcfOutputDir = customOutput ?: workDir.resolve("output").resolve("03_downsample_results") @@ -653,6 +666,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } ConvertToFasta().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory for downstream use (use custom or default) fastaOutputDir = customOutput ?: workDir.resolve("output").resolve("04_fasta_results") @@ -765,6 +779,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } AlignMutatedAssemblies().parse(args) + restoreOrchestratorLogging(workDir) // Save the mutated reference FASTA for use in step 6 mutatedRefFasta = step5RefFasta @@ -827,6 +842,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } PickCrossovers().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) refkeyOutputDir = customOutput ?: workDir.resolve("output").resolve("06_crossovers_results") @@ -883,6 +899,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } CreateChainFiles().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) chainOutputDir = customOutput ?: workDir.resolve("output").resolve("07_chain_results") @@ -943,6 +960,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } ConvertCoordinates().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) coordinatesOutputDir = customOutput ?: workDir.resolve("output").resolve("08_coordinates_results") @@ -999,6 +1017,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } GenerateRecombinedSequences().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) val outputBase = customOutput ?: workDir.resolve("output").resolve("09_recombined_sequences") @@ -1056,6 +1075,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { } FormatRecombinedFastas().parse(args) + restoreOrchestratorLogging(workDir) // Get output directory (use custom or default) formattedFastasDir = customOutput ?: workDir.resolve("output").resolve("10_formatted_fastas") From 9a0d8dccf9450f01c222a1aff824d149b0da0257 Mon Sep 17 00:00:00 2001 From: Brandon Date: Tue, 16 Dec 2025 18:21:48 -0600 Subject: [PATCH 7/8] Add missing parameter from "ConvertToFasta" --- pipeline_config.example.yaml | 3 ++- .../net/maizegenetics/commands/ConvertToFasta.kt | 11 +++++++++++ .../kotlin/net/maizegenetics/commands/Orchestrate.kt | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pipeline_config.example.yaml b/pipeline_config.example.yaml index f2ae5bb..ac6ff07 100644 --- a/pipeline_config.example.yaml +++ b/pipeline_config.example.yaml @@ -91,7 +91,7 @@ maf_to_gvcf: # Downsamples GVCF files at specified rates per chromosome # Automatically uses GVCF output directory from maf_to_gvcf downsample_gvcf: - ignore_contig: "__NO_MATCH__" # Optional: Comma-separated patterns to ignore + ignore_contig: "__NO_MATCH__" # Optional: Comma-separated patterns to ignore (currently needed) rates: "0.01,0.05,0.1,0.15,0.2" # Optional: Comma-separated downsampling rates seed: 42 # Optional: Random seed for reproducibility keep_ref: true # Optional: Keep reference blocks (true/false, default: true) @@ -105,6 +105,7 @@ downsample_gvcf: convert_to_fasta: missing_records_as: "asRef" # Optional: Missing records (asN, asRef, asNone) missing_genotype_as: "asN" # Optional: Missing genotypes (asN, asRef, asNone) + ignore_contig: "__NO_MATCH_" # Optional: Comma-separated patterns to skip (contigs matching will be ignored) (currently needed) # input: "/custom/gvcf/input/" # Optional: Custom GVCF input (file, directory, or text list) # output: "/custom/fasta/output/" # Optional: Custom output directory diff --git a/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt b/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt index dc4d7a9..203bcb4 100644 --- a/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt +++ b/src/main/kotlin/net/maizegenetics/commands/ConvertToFasta.kt @@ -61,6 +61,11 @@ class ConvertToFasta : CliktCommand(name = "convert-to-fasta") { ).choice("asN", "asRef", "asNone", ignoreCase = true) .default("asN") + private val ignoreContig by option( + "--ignore-contig", + help = "Comma-separated list of string patterns to ignore (contigs containing these patterns will be skipped)" + ).default("") + private val outputDirOption by option( "--output-dir", "-o", help = "Custom output directory (default: work_dir/output/04_fasta_results)" @@ -153,6 +158,9 @@ class ConvertToFasta : CliktCommand(name = "convert-to-fasta") { logger.info("Reference FASTA: $refFasta") logger.info("Missing records as: $missingRecordsAs") logger.info("Missing genotype as: $missingGenotypeAs") + if (ignoreContig.isNotEmpty()) { + logger.info("Ignore contig patterns: $ignoreContig") + } // Collect GVCF files val gvcfFiles = collectGvcfFiles() @@ -246,6 +254,9 @@ class ConvertToFasta : CliktCommand(name = "convert-to-fasta") { add("--fasta-file=${refFasta.toAbsolutePath()}") add("--missing-records-as=$missingRecordsAs") add("--missing-genotype-as=$missingGenotypeAs") + if (ignoreContig.isNotEmpty()) { + add("--ignore-contig=$ignoreContig") + } } // Run MLImpute ConvertToFasta command diff --git a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt index d66b585..5c30892 100644 --- a/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt +++ b/src/main/kotlin/net/maizegenetics/commands/Orchestrate.kt @@ -62,6 +62,7 @@ data class DownsampleGvcfConfig( data class ConvertToFastaConfig( val missing_records_as: String? = null, val missing_genotype_as: String? = null, + val ignore_contig: String? = null, // Comma-separated list of string patterns to ignore val input: String? = null, // Custom GVCF input file/directory val output: String? = null // Custom FASTA output directory ) @@ -252,6 +253,7 @@ class Orchestrate : CliktCommand(name = "orchestrate") { ConvertToFastaConfig( missing_records_as = convertToFastaMap?.get("missing_records_as") as? String, missing_genotype_as = convertToFastaMap?.get("missing_genotype_as") as? String, + ignore_contig = convertToFastaMap?.get("ignore_contig") as? String, input = convertToFastaMap?.get("input") as? String, output = convertToFastaMap?.get("output") as? String ) @@ -660,6 +662,9 @@ class Orchestrate : CliktCommand(name = "orchestrate") { if (config.convert_to_fasta.missing_genotype_as != null) { add("--missing-genotype-as=${config.convert_to_fasta.missing_genotype_as}") } + if (!config.convert_to_fasta.ignore_contig.isNullOrEmpty()) { + add("--ignore-contig=${config.convert_to_fasta.ignore_contig}") + } if (customOutput != null) { add("--output-dir=${customOutput}") } From 1505aebc0c2b6295d6456fe77dadfac5f2d58b6d Mon Sep 17 00:00:00 2001 From: Brandon Date: Wed, 17 Dec 2025 10:34:24 -0600 Subject: [PATCH 8/8] Fix syntax --- pipeline_config.example.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipeline_config.example.yaml b/pipeline_config.example.yaml index ac6ff07..211c01d 100644 --- a/pipeline_config.example.yaml +++ b/pipeline_config.example.yaml @@ -105,7 +105,7 @@ downsample_gvcf: convert_to_fasta: missing_records_as: "asRef" # Optional: Missing records (asN, asRef, asNone) missing_genotype_as: "asN" # Optional: Missing genotypes (asN, asRef, asNone) - ignore_contig: "__NO_MATCH_" # Optional: Comma-separated patterns to skip (contigs matching will be ignored) (currently needed) + ignore_contig: "__NO_MATCH__" # Optional: Comma-separated patterns to skip (contigs matching will be ignored) (currently needed) # input: "/custom/gvcf/input/" # Optional: Custom GVCF input (file, directory, or text list) # output: "/custom/fasta/output/" # Optional: Custom output directory @@ -147,9 +147,9 @@ pick_crossovers: # Automatically uses MAF files from align_assemblies (step 1) # Uses maf-convert tool (automatically downloaded if missing) create_chain_files: - jobs: 8 # Optional: Number of parallel jobs (default: 8) - # input: "/custom/maf/input/" # Optional: Custom MAF input (file, directory, or text list) - # output: "/custom/chain/output/" # Optional: Custom output directory + jobs: 8 # Optional: Number of parallel jobs (default: 8) + # input: "/custom/maf/input/" # Optional: Custom MAF input (file, directory, or text list) + # output: "/custom/chain/output/" # Optional: Custom output directory # Step 8: Convert Coordinates # Converts crossover breakpoints from reference coordinates to assembly coordinates