hillerlab · alejandrogzi · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/modules/nextflow/xorf/chunk/main.nf b/modules/nextflow/xorf/chunk/main.nf
@@ -0,0 +1,64 @@
+// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
+// Distributed under the terms of the Apache License, Version 2.0.
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    CHUNKER — Splits genomic regions (BED/GTF/GFF) and sequences (2bit/FA/FA.GZ) 
+    into chunks for parallel processing. Allows to extend the extracted chunk by a given
+    upstream and downstream amount of nucleotides. Additionally, it allows to specify 
+    the number of chunks to be generated.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process CHUNKER {
+    tag "$meta.id:$meta.chr"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container 'ghcr.io/alejandrogzi/orf-chunk:latest'
+
+    input:
+    tuple val(meta), path(regions)
+    tuple val(meta1), path(sequence)
+    val(chunk_size)
+
+    output:
+    tuple val(meta), path('tmp/*bed'),     optional: true, emit: chunked_regions
+    tuple val(meta), path('tmp/*fa'),      optional: true, emit: chunked_sequences
+    path "versions.yml",  emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def upstream = task.ext.upstream ?: 1000
+    def downstream = task.ext.downstream ?: 1000
+    def prefix = task.ext.prefix ?: meta.chr
+    """
+    orf chunk \\
+    --regions $regions \\
+    --sequence $sequence \\
+    --chunks $chunk_size \\
+    -u $upstream \\
+    -d $downstream \\
+    --prefix $prefix \\
+    --ignore-errors
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-chunk: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch tmp
+    touch tmp/*bed
+    touch tmp/*fa
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-chunk: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nextflow/xorf/netstart2/main.nf b/modules/nextflow/xorf/netstart2/main.nf
@@ -0,0 +1,53 @@
+// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
+// Distributed under the terms of the Apache License, Version 2.0.
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    NETSTART2 — Predicts translation initiation sites using neural networks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process NETSTART2 {
+    tag "$meta.id:$meta.name"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container 'ghcr.io/alejandrogzi/orf-net:latest'
+
+    input:
+    tuple val(meta), path(sequence)
+    tuple val(meta1), path(bed)
+
+    output:
+    tuple val(meta1), path("${meta.id}*csv"), optional: true, emit: netstart
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    netstart2 \\
+    -in $sequence \\
+    -compute_device cpu \\
+    -o chordata \\
+    -out ${meta.id}_netstart \\
+    $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        netstart2: \$(netstart2 --version 2>&1 | sed 's/.*Version: //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${meta.id}*
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        netstart2: \$(netstart2 --version 2>&1 | sed 's/.*Version: //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nextflow/xorf/rnasamba/main.nf b/modules/nextflow/xorf/rnasamba/main.nf
@@ -0,0 +1,68 @@
+// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
+// Distributed under the terms of the Apache License, Version 2.0.
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RNASAMBA — Classifies ORFs as coding or non-coding using RNAsamba machine learning
+    models through a Rust wrapper. Requires specifiying the upstream and downstream 
+    amount of nucleotides extended from the incoming file.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process RNASAMBA {
+    tag "$meta.id:$meta.name"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container 'ghcr.io/alejandrogzi/orf-samba:latest'
+
+    input:
+    tuple val(meta), path(bed), path(sequence)
+    tuple val(meta1), path(weights)
+
+    output:
+    tuple val(meta), path("${meta.id}/*tsv")      , optional: true, emit: samba
+    tuple val(meta), path("${meta.id}/*strip.fa") , optional: true, emit: fasta
+    tuple val(meta), path(bed)                    , optional: true, emit: bed
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def upstream = task.ext.upstream ?: 1000
+    def downstream = task.ext.downstream ?: 1000
+    """
+    orf samba \\
+    --fasta $sequence \\
+    --outdir ${meta.id} \\
+    --upstream-flank $upstream \\
+    --downstream-flank $downstream \\
+    --weights $weights \\
+    $args
+
+    mv ${meta.id}/samba/*tsv ${meta.id}/${meta.id}.${meta.name}.samba.tsv && rm -rf ${meta.id}/samba
+    mv ${meta.name}.tmp.strip.fa ${meta.id}/${meta.id}.${meta.name}.strip.fa 
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-samba: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+        rnasamba: \$(rnasamba --version 2>&1 | tail -n 1 | sed 's/^rnasamba //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${meta.id}
+    touch ${meta.id}/*strip.fa
+    touch ${meta.id}/samba
+    touch ${meta.id}/samba/*
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-samba: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+        rnasamba: \$(rnasamba --version 2>&1 | tail -n 1 | sed 's/^rnasamba //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nextflow/xorf/transaid/main.nf b/modules/nextflow/xorf/transaid/main.nf
@@ -0,0 +1,56 @@
+// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
+// Distributed under the terms of the Apache License, Version 2.0.
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    TRANSAID — Predicts translation initiation sites using TransAID deep learning
+    models. 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process TRANSAID {
+    tag "$meta.id:$meta.name"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container 'ghcr.io/alejandrogzi/orf-net:latest'
+
+    input:
+    tuple val(meta), path(sequence)
+    tuple val(meta1), path(bed)
+
+    output:
+    tuple val(meta1), path("*csv")            , optional: true, emit: transaid
+    path "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    transaid \\
+    --input $sequence \\
+    --gpu -1 \\
+    --output ${meta.id}_transaid \\
+    $args
+
+    mv *csv ${meta.id}.${meta.name}.transaid.csv
+    rm *.faa
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        transaid: \$(transaid --version 2>&1 | sed 's/.*Version: //')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${meta.id}*
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+         transaid: \$(transaid --version 2>&1 | sed 's/.*Version: //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nextflow/xorf/translationai/main.nf b/modules/nextflow/xorf/translationai/main.nf
@@ -0,0 +1,61 @@
+// Copyright (c) 2025 Alejandro Gonzales-Irribarren <alejandrxgzi@gmail.com>
+// Distributed under the terms of the Apache License, Version 2.0.
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    TRANSLATIONAI — Runs translational inference (TAI) on ORF predictions through a
+    Rust wrapper. Requires specifiying the upstream and downstream amount of nucleotides
+    extended from the incoming file.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+process TRANSLATION {
+    tag "$meta.id:$meta.name"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container 'ghcr.io/alejandrogzi/orf-tai:latest'
+
+    input:
+    tuple val(meta), path(bed), path(sequence)
+
+    output:
+    tuple val(meta), path(bed), path(sequence), path("${meta.id}/*result"), optional: true, emit: predictions
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def upstream = task.ext.upstream ?: 1000
+    def downstream = task.ext.downstream ?: 1000
+    """
+    orf tai \\
+    --fasta $sequence \\
+    --bed $bed \\
+    --outdir ${meta.id} \\
+    -u $upstream \\
+    -d $downstream
+
+    mv ${meta.id}/tai/*result ${meta.id}/ && rm -rf ${meta.id}/tai
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-tai: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+        translationai: 0.0.1
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${meta.id}
+    touch ${meta.id}/tai
+    touch ${meta.id}/tai/*result
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        orf-tai: \$(orf --version 2>&1 | sed 's/^.*orf //; s/ .*\$//')
+        translationai: 0.0.1
+    END_VERSIONS
+    """
+}
diff --git a/modules/wdl/xorf/chunk/main.wdl b/modules/wdl/xorf/chunk/main.wdl
@@ -0,0 +1,81 @@
+# Copyright (c) 2026 The Hiller Lab at the Senckenberg Gessellschaft für Naturforschung
+# Distributed under the terms of the Apache License, Version 2.0.
+
+# CHUNKER — Splits genomic regions (BED/GTF/GFF) and sequences (2bit/FA/FA.GZ)
+# into chunks for parallel processing. Allows to extend the extracted chunk by a given
+# upstream and downstream amount of nucleotides. Additionally, it allows to specify
+# the number of chunks to be generated.
+
+version 1.3
+
+task chunk {
+  input {
+    String meta_id
+    String meta_chr
+    File regions
+    File sequence
+    Int chunk_size
+    Int upstream = 1000
+    Int downstream = 1000
+    String prefix = meta_chr
+  }
+
+  command <<<
+    set -euo pipefail
+
+    orf chunk \
+    --regions ~{regions} \
+    --sequence ~{sequence} \
+    --chunks ~{chunk_size} \
+    -u ~{upstream} \
+    -d ~{downstream} \
+    --prefix ~{prefix} \
+    --ignore-errors
+
+    cat <<-END_VERSIONS > versions.yml
+    "CHUNKER":
+        orf-chunk: $(orf --version 2>&1 | sed 's/^.*orf //; s/ .*$//')
+    END_VERSIONS
+  >>>
+
+  output {
+    Array[File] chunked_regions = glob("tmp/*bed")
+    Array[File] chunked_sequences = glob("tmp/*fa")
+    File versions = "versions.yml"
+  }
+
+  requirements {
+    container: "ghcr.io/alejandrogzi/orf-chunk:latest"
+  }
+}
+
+workflow run {
+  input {
+    String meta_id
+    String meta_chr
+    File regions
+    File sequence
+    Int chunk_size
+    Int upstream = 1000
+    Int downstream = 1000
+    String prefix = meta_chr
+  }
+
+  call chunk {
+    input:
+      meta_id = meta_id,
+      meta_chr = meta_chr,
+      regions = regions,
+      sequence = sequence,
+      chunk_size = chunk_size,
+      upstream = upstream,
+      downstream = downstream,
+      prefix = prefix
+  }
+
+  output {
+    Array[File] chunked_regions = chunk.chunked_regions
+    Array[File] chunked_sequences = chunk.chunked_sequences
+    File versions = chunk.versions
+  }
+}