Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions modules/nextflow/xorf/chunk/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
// Distributed under the terms of the Apache License, Version 2.0.

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CHUNKER — Splits genomic regions (BED/GTF/GFF) and sequences (2bit/FA/FA.GZ)
into chunks for parallel processing. Allows to extend the extracted chunk by a given
upstream and downstream amount of nucleotides. Additionally, it allows to specify
the number of chunks to be generated.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

process CHUNKER {
tag "$meta.id:$meta.chr"
label 'process_low'

conda "${moduleDir}/environment.yml"
container 'ghcr.io/alejandrogzi/orf-chunk:latest'

input:
tuple val(meta), path(regions)
tuple val(meta1), path(sequence)
val(chunk_size)

output:
tuple val(meta), path('tmp/*bed'), optional: true, emit: chunked_regions
tuple val(meta), path('tmp/*fa'), optional: true, emit: chunked_sequences
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def upstream = task.ext.upstream ?: 1000
def downstream = task.ext.downstream ?: 1000
def prefix = task.ext.prefix ?: meta.chr
"""
orf chunk \\
--regions $regions \\
--sequence $sequence \\
--chunks $chunk_size \\
-u $upstream \\
-d $downstream \\
--prefix $prefix \\
--ignore-errors

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-chunk: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
END_VERSIONS
"""

stub:
"""
touch tmp
touch tmp/*bed
touch tmp/*fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-chunk: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/nextflow/xorf/netstart2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
// Distributed under the terms of the Apache License, Version 2.0.

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NETSTART2 — Predicts translation initiation sites using neural networks
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

process NETSTART2 {
tag "$meta.id:$meta.name"
label 'process_single'

conda "${moduleDir}/environment.yml"
container 'ghcr.io/alejandrogzi/orf-net:latest'

input:
tuple val(meta), path(sequence)
tuple val(meta1), path(bed)

output:
tuple val(meta1), path("${meta.id}*csv"), optional: true, emit: netstart
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
netstart2 \\
-in $sequence \\
-compute_device cpu \\
-o chordata \\
-out ${meta.id}_netstart \\
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
netstart2: \$(netstart2 --version 2>&1 | sed 's/.*Version: //')
END_VERSIONS
"""

stub:
"""
touch ${meta.id}*

cat <<-END_VERSIONS > versions.yml
"${task.process}":
netstart2: \$(netstart2 --version 2>&1 | sed 's/.*Version: //')
END_VERSIONS
"""
}
68 changes: 68 additions & 0 deletions modules/nextflow/xorf/rnasamba/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
// Distributed under the terms of the Apache License, Version 2.0.

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RNASAMBA — Classifies ORFs as coding or non-coding using RNAsamba machine learning
models through a Rust wrapper. Requires specifiying the upstream and downstream
amount of nucleotides extended from the incoming file.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

process RNASAMBA {
tag "$meta.id:$meta.name"
label 'process_single'

conda "${moduleDir}/environment.yml"
container 'ghcr.io/alejandrogzi/orf-samba:latest'

input:
tuple val(meta), path(bed), path(sequence)
tuple val(meta1), path(weights)

output:
tuple val(meta), path("${meta.id}/*tsv") , optional: true, emit: samba
tuple val(meta), path("${meta.id}/*strip.fa") , optional: true, emit: fasta
tuple val(meta), path(bed) , optional: true, emit: bed
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def upstream = task.ext.upstream ?: 1000
def downstream = task.ext.downstream ?: 1000
"""
orf samba \\
--fasta $sequence \\
--outdir ${meta.id} \\
--upstream-flank $upstream \\
--downstream-flank $downstream \\
--weights $weights \\
$args

mv ${meta.id}/samba/*tsv ${meta.id}/${meta.id}.${meta.name}.samba.tsv && rm -rf ${meta.id}/samba
mv ${meta.name}.tmp.strip.fa ${meta.id}/${meta.id}.${meta.name}.strip.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-samba: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
rnasamba: \$(rnasamba --version 2>&1 | tail -n 1 | sed 's/^rnasamba //')
END_VERSIONS
"""

stub:
"""
touch ${meta.id}
touch ${meta.id}/*strip.fa
touch ${meta.id}/samba
touch ${meta.id}/samba/*

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-samba: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
rnasamba: \$(rnasamba --version 2>&1 | tail -n 1 | sed 's/^rnasamba //')
END_VERSIONS
"""
}
56 changes: 56 additions & 0 deletions modules/nextflow/xorf/transaid/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
// Distributed under the terms of the Apache License, Version 2.0.

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
TRANSAID — Predicts translation initiation sites using TransAID deep learning
models.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

process TRANSAID {
tag "$meta.id:$meta.name"
label 'process_single'

conda "${moduleDir}/environment.yml"
container 'ghcr.io/alejandrogzi/orf-net:latest'

input:
tuple val(meta), path(sequence)
tuple val(meta1), path(bed)

output:
tuple val(meta1), path("*csv") , optional: true, emit: transaid
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
transaid \\
--input $sequence \\
--gpu -1 \\
--output ${meta.id}_transaid \\
$args

mv *csv ${meta.id}.${meta.name}.transaid.csv
rm *.faa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
transaid: \$(transaid --version 2>&1 | sed 's/.*Version: //')
END_VERSIONS
"""

stub:
"""
touch ${meta.id}*

cat <<-END_VERSIONS > versions.yml
"${task.process}":
transaid: \$(transaid --version 2>&1 | sed 's/.*Version: //')
END_VERSIONS
"""
}
61 changes: 61 additions & 0 deletions modules/nextflow/xorf/translationai/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
// Distributed under the terms of the Apache License, Version 2.0.

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
TRANSLATIONAI — Runs translational inference (TAI) on ORF predictions through a
Rust wrapper. Requires specifiying the upstream and downstream amount of nucleotides
extended from the incoming file.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

process TRANSLATION {
tag "$meta.id:$meta.name"
label 'process_single'

conda "${moduleDir}/environment.yml"
container 'ghcr.io/alejandrogzi/orf-tai:latest'

input:
tuple val(meta), path(bed), path(sequence)

output:
tuple val(meta), path(bed), path(sequence), path("${meta.id}/*result"), optional: true, emit: predictions
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def upstream = task.ext.upstream ?: 1000
def downstream = task.ext.downstream ?: 1000
"""
orf tai \\
--fasta $sequence \\
--bed $bed \\
--outdir ${meta.id} \\
-u $upstream \\
-d $downstream

mv ${meta.id}/tai/*result ${meta.id}/ && rm -rf ${meta.id}/tai

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-tai: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
translationai: 0.0.1
END_VERSIONS
"""

stub:
"""
touch ${meta.id}
touch ${meta.id}/tai
touch ${meta.id}/tai/*result

cat <<-END_VERSIONS > versions.yml
"${task.process}":
orf-tai: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
translationai: 0.0.1
END_VERSIONS
"""
}
81 changes: 81 additions & 0 deletions modules/wdl/xorf/chunk/main.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) 2026 The Hiller Lab at the Senckenberg Gessellschaft für Naturforschung
# Distributed under the terms of the Apache License, Version 2.0.

# CHUNKER — Splits genomic regions (BED/GTF/GFF) and sequences (2bit/FA/FA.GZ)
# into chunks for parallel processing. Allows to extend the extracted chunk by a given
# upstream and downstream amount of nucleotides. Additionally, it allows to specify
# the number of chunks to be generated.

version 1.3

task chunk {
input {
String meta_id
String meta_chr
File regions
File sequence
Int chunk_size
Int upstream = 1000
Int downstream = 1000
String prefix = meta_chr
}

command <<<
set -euo pipefail

orf chunk \
--regions ~{regions} \
--sequence ~{sequence} \
--chunks ~{chunk_size} \
-u ~{upstream} \
-d ~{downstream} \
--prefix ~{prefix} \
--ignore-errors

cat <<-END_VERSIONS > versions.yml
"CHUNKER":
orf-chunk: $(orf --version 2>&1 | sed 's/^.*orf //; s/ .*$//')
END_VERSIONS
>>>

output {
Array[File] chunked_regions = glob("tmp/*bed")
Array[File] chunked_sequences = glob("tmp/*fa")
File versions = "versions.yml"
}

requirements {
container: "ghcr.io/alejandrogzi/orf-chunk:latest"
}
}

workflow run {
input {
String meta_id
String meta_chr
File regions
File sequence
Int chunk_size
Int upstream = 1000
Int downstream = 1000
String prefix = meta_chr
}

call chunk {
input:
meta_id = meta_id,
meta_chr = meta_chr,
regions = regions,
sequence = sequence,
chunk_size = chunk_size,
upstream = upstream,
downstream = downstream,
prefix = prefix
}

output {
Array[File] chunked_regions = chunk.chunked_regions
Array[File] chunked_sequences = chunk.chunked_sequences
File versions = chunk.versions
}
}
Loading
Loading