Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ plugins {
}

group = "net.maizegenetics"
version = "0.2.6"
version = "0.2.7"

repositories {
mavenCentral()
Expand Down
278 changes: 158 additions & 120 deletions pipeline_config.example.yaml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import kotlin.system.exitProcess

class AlignMutatedAssemblies : CliktCommand(name = "align-mutated-assemblies") {
companion object {
private const val LOG_FILE_NAME = "05_align_mutated_assemblies.log"
private const val MUTATED_ALIGNMENT_RESULTS_DIR = "05_mutated_alignment_results"
private const val LOG_FILE_NAME = "10_align_mutated_assemblies.log"
private const val MUTATED_ALIGNMENT_RESULTS_DIR = "10_mutated_alignment_results"
private const val MAF_PATHS_FILE = "maf_file_paths.txt"

// minimap2 parameters
Expand Down Expand Up @@ -71,7 +71,7 @@ class AlignMutatedAssemblies : CliktCommand(name = "align-mutated-assemblies") {

private val outputDir by option(
"--output-dir", "-o",
help = "Custom output directory (default: work_dir/output/05_mutated_alignment_results)"
help = "Custom output directory (default: work_dir/output/10_mutated_alignment_results)"
).path(mustExist = false, canBeFile = false, canBeDir = true)

private fun collectFastaFiles(): List<Path> {
Expand Down
22 changes: 12 additions & 10 deletions src/main/kotlin/net/maizegenetics/commands/ConvertCoordinates.kt
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ import kotlin.system.exitProcess

class ConvertCoordinates : CliktCommand(name = "convert-coordinates") {
companion object {
private const val LOG_FILE_NAME = "08_convert_coordinates.log"
private const val COORDS_RESULTS_DIR = "08_coordinates_results"
private const val LOG_FILE_NAME = "07_convert_coordinates.log"
private const val COORDS_RESULTS_DIR = "07_coordinates_results"
private const val KEY_PATHS_FILE = "key_file_paths.txt"
private const val FOUNDER_KEY_PATHS_FILE = "founder_key_file_paths.txt"
private const val PYTHON_SCRIPT = "src/python/cross/convert_coords.py"
private const val DEFAULT_REFKEY_DIR = "06_crossovers_results"
private const val DEFAULT_REFKEY_DIR = "05_crossovers_results"
}

private val logger: Logger = LogManager.getLogger(ConvertCoordinates::class.java)
Expand Down Expand Up @@ -53,7 +53,7 @@ class ConvertCoordinates : CliktCommand(name = "convert-coordinates") {

private val outputDirOption by option(
"--output-dir", "-o",
help = "Custom output directory (default: work_dir/output/08_coordinates_results)"
help = "Custom output directory (default: work_dir/output/07_coordinates_results)"
).path(mustExist = false, canBeFile = false, canBeDir = true)

override fun run() {
Expand Down Expand Up @@ -112,14 +112,16 @@ class ConvertCoordinates : CliktCommand(name = "convert-coordinates") {

// Run convert_coords.py
// Set PYTHONPATH so Python can find the 'python' package for imports
val pythonPath = mlimputeDir.resolve("src").toString()
// Use absolute paths since the working directory is set to outputDir
// Use sh -c to set PYTHONPATH inside pixi's environment
val pythonPath = mlimputeDir.resolve("src").toAbsolutePath().toString()
val scriptPath = pythonScript.toAbsolutePath().toString()
val assemblyListPath = assemblyList.toAbsolutePath().toString()
val chainDirPath = chainDir.toAbsolutePath().toString()
val shellCommand = "PYTHONPATH='$pythonPath' python '$scriptPath' --assembly-list '$assemblyListPath' --chain-dir '$chainDirPath'"
logger.info("Running convert_coords.py")
val exitCode = ProcessRunner.runCommand(
"env", "PYTHONPATH=$pythonPath",
"pixi", "run",
"python", pythonScript.toString(),
"--assembly-list", assemblyList.toString(),
"--chain-dir", chainDir.toString(),
"pixi", "run", "sh", "-c", shellCommand,
workingDir = outputDir.toFile(),
logger = logger
)
Expand Down
30 changes: 22 additions & 8 deletions src/main/kotlin/net/maizegenetics/commands/CreateChainFiles.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import kotlin.system.exitProcess

class CreateChainFiles : CliktCommand(name = "create-chain-files") {
companion object {
private const val LOG_FILE_NAME = "07_create_chain_files.log"
private const val CHAIN_RESULTS_DIR = "07_chain_results"
private const val LOG_FILE_NAME = "06_create_chain_files.log"
private const val CHAIN_RESULTS_DIR = "06_chain_results"
private const val CHAIN_PATHS_FILE = "chain_file_paths.txt"
private const val BASH_SCRIPT = "src/python/cross/create_chains.sh"
private const val DEFAULT_JOBS = 8
Expand Down Expand Up @@ -49,7 +49,7 @@ class CreateChainFiles : CliktCommand(name = "create-chain-files") {

private val outputDirOption by option(
"--output-dir", "-o",
help = "Custom output directory (default: work_dir/output/07_chain_results)"
help = "Custom output directory (default: work_dir/output/06_chain_results)"
).path(mustExist = false, canBeFile = false, canBeDir = true)

private fun collectMafFiles(): List<Path> {
Expand Down Expand Up @@ -128,20 +128,34 @@ class CreateChainFiles : CliktCommand(name = "create-chain-files") {
logger.info("Cleaning up temporary MAF directory")
tempMafDir.toFile().deleteRecursively()

// Collect generated chain files
// Collect generated chain files (all files with .chain extension)
val chainFiles = outputDir.listDirectoryEntries()
.filter { it.isRegularFile() && it.extension == "chain" }
.sorted()

if (chainFiles.isEmpty()) {
// Rename chain files to include "_subsampled" suffix if not already present
val renamedChainFiles = chainFiles.map { chainFile ->
val fileName = chainFile.nameWithoutExtension
if (!fileName.endsWith("_subsampled")) {
val newFileName = "${fileName}_subsampled.chain"
val newPath = chainFile.parent.resolve(newFileName)
chainFile.moveTo(newPath, overwrite = true)
logger.info("Renamed ${chainFile.fileName} to $newFileName")
newPath
} else {
chainFile
}
}.sorted()

if (renamedChainFiles.isEmpty()) {
logger.warn("No chain files generated")
} else {
logger.info("Generated ${chainFiles.size} chain file(s):")
chainFiles.forEach { logger.info(" $it") }
logger.info("Generated ${renamedChainFiles.size} chain file(s):")
renamedChainFiles.forEach { logger.info(" $it") }

// Write chain file paths to text file
FileUtils.writeFilePaths(
chainFiles,
renamedChainFiles,
outputDir.resolve(CHAIN_PATHS_FILE),
logger,
"Chain file"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ import kotlin.system.exitProcess

class FormatRecombinedFastas : CliktCommand(name = "format-recombined-fastas") {
companion object {
private const val LOG_FILE_NAME = "10_format_recombined_fastas.log"
private const val FORMATTED_RESULTS_DIR = "10_formatted_fastas"
private const val LOG_FILE_NAME = "09_format_recombined_fastas.log"
private const val FORMATTED_RESULTS_DIR = "09_formatted_fastas"
private const val FORMATTED_FASTA_PATHS_FILE = "formatted_fasta_paths.txt"
private const val DEFAULT_LINE_WIDTH = 60
private const val DEFAULT_THREADS = 8
private const val DEFAULT_INPUT_DIR = "09_recombined_sequences/recombinate_fastas"
private const val DEFAULT_INPUT_DIR = "08_recombined_sequences/recombinate_fastas"
}

private val logger: Logger = LogManager.getLogger(FormatRecombinedFastas::class.java)
Expand Down Expand Up @@ -53,7 +53,7 @@ class FormatRecombinedFastas : CliktCommand(name = "format-recombined-fastas") {

private val outputDirOption by option(
"--output-dir", "-o",
help = "Custom output directory (default: work_dir/output/10_formatted_fastas)"
help = "Custom output directory (default: work_dir/output/09_formatted_fastas)"
).path(mustExist = false, canBeFile = false, canBeDir = true)

private fun collectFastaFiles(): List<Path> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ import net.maizegenetics.utils.ProcessRunner
import net.maizegenetics.utils.ValidationUtils
import org.apache.logging.log4j.LogManager
import org.apache.logging.log4j.Logger
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.*
import kotlin.system.exitProcess

class GenerateRecombinedSequences : CliktCommand(name = "generate-recombined-sequences") {
companion object {
private const val LOG_FILE_NAME = "09_generate_recombined_sequences.log"
private const val RECOMBINED_RESULTS_DIR = "09_recombined_sequences"
private const val LOG_FILE_NAME = "08_generate_recombined_sequences.log"
private const val RECOMBINED_RESULTS_DIR = "08_recombined_sequences"
private const val RECOMBINED_FASTAS_DIR = "recombinate_fastas"
private const val FASTA_PATHS_FILE = "recombined_fasta_paths.txt"
private const val PYTHON_SCRIPT = "src/python/cross/write_fastas.py"
private const val DEFAULT_FOUNDER_KEY_DIR = "08_coordinates_results"
private const val DEFAULT_FOUNDER_KEY_DIR = "07_coordinates_results"
}

private val logger: Logger = LogManager.getLogger(GenerateRecombinedSequences::class.java)
Expand Down Expand Up @@ -59,7 +60,7 @@ class GenerateRecombinedSequences : CliktCommand(name = "generate-recombined-seq

private val outputDirOption by option(
"--output-dir", "-o",
help = "Custom output directory (default: work_dir/output/09_recombined_sequences)"
help = "Custom output directory (default: work_dir/output/08_recombined_sequences)"
).path(mustExist = false, canBeFile = false, canBeDir = true)

override fun run() {
Expand Down Expand Up @@ -119,17 +120,58 @@ class GenerateRecombinedSequences : CliktCommand(name = "generate-recombined-seq
}
}

// Create symlinks for FASTA files with different extensions
// The Python script expects .fa extension, but files might have .fasta or .fna
logger.info("Checking FASTA file extensions in assembly directory")
val assemblyNames = assemblyList.readLines()
.filter { it.isNotBlank() }
.mapNotNull { line ->
val parts = line.split("\t")
if (parts.size >= 2) parts[1].trim() else null
}

assemblyNames.forEach { name ->
val faFile = assemblyDir.resolve("$name.fa")
if (!faFile.exists()) {
// Look for alternative extensions
val alternatives = listOf("fasta", "fna")
for (ext in alternatives) {
val altFile = assemblyDir.resolve("$name.$ext")
if (altFile.exists()) {
// Create symlink: name.fa -> name.fasta (or .fna)
try {
Files.createSymbolicLink(faFile, altFile.fileName)
logger.debug("Created symlink: ${faFile.fileName} -> ${altFile.fileName}")

// Also create symlink for the FASTA index file if it exists
val altIndexFile = assemblyDir.resolve("$name.$ext.fai")
val faIndexFile = assemblyDir.resolve("$name.fa.fai")
if (altIndexFile.exists() && !faIndexFile.exists()) {
Files.createSymbolicLink(faIndexFile, altIndexFile.fileName)
logger.debug("Created symlink: ${faIndexFile.fileName} -> ${altIndexFile.fileName}")
}
} catch (e: Exception) {
logger.warn("Failed to create symlink for $name: ${e.message}")
}
break
}
}
}
}

// Run write_fastas.py
// Set PYTHONPATH so Python can find the 'python' package for imports
val pythonPath = mlimputeDir.resolve("src").toString()
// Use absolute paths since the working directory is set to outputDir
// Use sh -c to set PYTHONPATH inside pixi's environment
val pythonPath = mlimputeDir.resolve("src").toAbsolutePath().toString()
val scriptPath = pythonScript.toAbsolutePath().toString()
val assemblyListPath = assemblyList.toAbsolutePath().toString()
val chromosomeListPath = chromosomeList.toAbsolutePath().toString()
val assemblyDirPath = assemblyDir.toAbsolutePath().toString()
val shellCommand = "PYTHONPATH='$pythonPath' python '$scriptPath' --assembly-list '$assemblyListPath' --chromosome-list '$chromosomeListPath' --assembly-dir '$assemblyDirPath'"
logger.info("Running write_fastas.py")
val exitCode = ProcessRunner.runCommand(
"env", "PYTHONPATH=$pythonPath",
"pixi", "run",
"python", pythonScript.toString(),
"--assembly-list", assemblyList.toString(),
"--chromosome-list", chromosomeList.toString(),
"--assembly-dir", assemblyDir.toString(),
"pixi", "run", "sh", "-c", shellCommand,
workingDir = outputDir.toFile(),
logger = logger
)
Expand Down
Loading