From 8db63924402d61c597a70e1fec8d46373a0cf12c Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Fri, 8 Jul 2022 16:43:19 +0900
Subject: [PATCH 1/9] first try at umicollapse

still running into issues with the docker image
---
 modules/local/UMICollapse/main.nf   | 36 +++++++++++++++++++++++
 modules/local/UMICollapse/meta.yml  | 45 +++++++++++++++++++++++++++++
 subworkflows/primaryclipanalysis.nf | 39 +++++++------------------
 3 files changed, 91 insertions(+), 29 deletions(-)
 create mode 100644 modules/local/UMICollapse/main.nf
 create mode 100644 modules/local/UMICollapse/meta.yml

diff --git a/modules/local/UMICollapse/main.nf b/modules/local/UMICollapse/main.nf
new file mode 100644
index 0000000..60452e6
--- /dev/null
+++ b/modules/local/UMICollapse/main.nf
@@ -0,0 +1,36 @@
+process UMICOLLAPSE {
+    tag "$meta.id"
+    label "process_high"
+
+    container 'elly1502/umicollapse:latest'
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.bam")             , emit: bam
+    tuple val(meta), path("*.log")             , emit: log
+    path  "versions.yml"                       , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    ./umicollapse \\
+        bam \\
+        -i $bam \\
+        -o ${prefix}.bam \\
+        $args
+
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        umicollapse: NA
+    END_VERSIONS
+    """
+}
+
diff --git a/modules/local/UMICollapse/meta.yml b/modules/local/UMICollapse/meta.yml
new file mode 100644
index 0000000..fbd3918
--- /dev/null
+++ b/modules/local/UMICollapse/meta.yml
@@ -0,0 +1,45 @@
+name: umicollapse
+description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords:
+  - umicollapse
+  - deduplication
+tools:
+  - umicollapse:
+      description: >
+        UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs).
+      documentation: https://github.com/Daniel-Liu-c0deb0t/UMICollapse
+      license: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: |
+        BAM file containing reads to be deduplicated via UMIs.
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: |
+        BAM index files corresponding to the input BAM file.
+      pattern: "*.{bai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file with deduplicated UMIs.
+      pattern: "*.{bam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@Daniel-Liu-c0deb0t"
+  - "@CharlotteAnne"
diff --git a/subworkflows/primaryclipanalysis.nf b/subworkflows/primaryclipanalysis.nf
index fa9da2a..d26f1d8 100644
--- a/subworkflows/primaryclipanalysis.nf
+++ b/subworkflows/primaryclipanalysis.nf
@@ -7,9 +7,9 @@ include { BOWTIE_ALIGN } from '../modules/nf-core/modules/bowtie/align/main'
 include { STAR_ALIGN } from '../modules/nf-core/modules/star/align/main'
 include { DU } from '../modules/local/du/main'
 include { GET_UMI_LENGTH } from '../modules/local/get_umi_length/main'
-include { UMITOOLS_DEDUP } from '../modules/local/umitools_dedup/main'
+include { UMICOLLAPSE } from '../modules/local/UMICollapse/main'
 include { SAMTOOLS_INDEX as STAR_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main'
-include { SAMTOOLS_INDEX as UMITOOLS_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main'
+include { SAMTOOLS_INDEX as UMICOLLAPSE_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main'
 include { GET_CROSSLINKS } from '../modules/local/get_crosslinks/main'
 include { CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/coverage/main'
 include { CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main'
@@ -122,7 +122,7 @@ workflow PRIMARY_CLIP_ANALYSIS {
         false, "", ""
     )
 
-    // Create a channel which outputs [reads_meta, transcript_txt] pairs
+/*     // Create a channel which outputs [reads_meta, transcript_txt] pairs
     reads.map{triplet -> [
         triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.txt")
     ]}.set{ ch_longest_transcript }
@@ -141,8 +141,8 @@ workflow PRIMARY_CLIP_ANALYSIS {
 
     // Filter transcripts
     FILTER_TRANSCRIPTS ( ch_filter_input.star, ch_filter_input.transcripts )
-
-    // Get TOME crosslinks
+ */
+/*     // Get TOME crosslinks
     TOME_STAR_SAMTOOLS_INDEX ( FILTER_TRANSCRIPTS.out.filtered_bam )
     FILTER_TRANSCRIPTS.out.filtered_bam.join(TOME_STAR_SAMTOOLS_INDEX.out.bai)
         .set{ tome_ch_umi_input }
@@ -172,37 +172,16 @@ workflow PRIMARY_CLIP_ANALYSIS {
     }.set { ch_tome_input }
     TOME_GET_CROSSLINKS ( ch_tome_input.bam, ch_tome_input.transcript )
     TOME_CROSSLINKS_COVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed )
-    TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed )
+    TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) */
 
 
     // Get crosslinks
     STAR_SAMTOOLS_INDEX ( STAR_ALIGN.out.bam_sorted )
     ch_umi_input = STAR_ALIGN.out.bam_sorted.combine(STAR_SAMTOOLS_INDEX.out.bai, by: 0)
 
-    // Determine if UMITools needs to be run in "low_memory" mode
-    DU ( ch_umi_input.map{it -> it[0, 1]} )
-    GET_UMI_LENGTH ( ch_umi_input )
-    ch_umi_input
-        .join( DU.out.size )
-        .join( GET_UMI_LENGTH.out.length )
-        .map( annotate_umitools_input )
-        .set{ ch_umi_input_annotated }
-
-    UMITOOLS_DEDUP ( ch_umi_input_annotated )
-
-    // Strip out the low_memory key from the meta value so that the later joins
-    // actually work
-    UMITOOLS_DEDUP.out.bam
-        .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[1]] }
-        .set{ ch_umitools_bam }
-    // Keep a channel for converting between meta with the low_memory key and
-    // without, in case in the future you want to keep track of which files were
-    // run as low mem
-    UMITOOLS_DEDUP.out.bam
-        .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[0]] }
-        .set{ ch_meta_conversion }
+    UMICOLLAPSE ( ch_umi_input )
 
-    UMITOOLS_SAMTOOLS_INDEX ( ch_umitools_bam )
+ /*    UMICOLLAPSE_SAMTOOLS_INDEX ( ch_umitools_bam )
 
     reads.map{triplet -> [
         triplet[0], file(triplet[2] + "/SAMTOOLS_FAIDX/*.fa.fai")
@@ -301,3 +280,5 @@ workflow PRIMARY_CLIP_ANALYSIS {
         paraclu_peaks        = PARACLU_CONVERT.out.peaks
         clippy_peaks         = CLIPPY.out.peaks
 }
+ */
+}
\ No newline at end of file

From f06ca3a7d9be333a8def7d9c23eec5a9da36ddf6 Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Fri, 8 Jul 2022 17:34:36 +0900
Subject: [PATCH 2/9] umicollapse working now

---
 modules/local/UMICollapse/main.nf       | 3 ++-
 subworkflows/primaryclipanalysis.config | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/local/UMICollapse/main.nf b/modules/local/UMICollapse/main.nf
index 60452e6..037c9c3 100644
--- a/modules/local/UMICollapse/main.nf
+++ b/modules/local/UMICollapse/main.nf
@@ -20,12 +20,13 @@ process UMICOLLAPSE {
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     """
-    ./umicollapse \\
+    java -jar /UMICollapse/umicollapse.jar \\
         bam \\
         -i $bam \\
         -o ${prefix}.bam \\
         $args
 
+    mv .command.log ${prefix}_UMICollapse.log
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/primaryclipanalysis.config b/subworkflows/primaryclipanalysis.config
index 3328ae1..ee39972 100644
--- a/subworkflows/primaryclipanalysis.config
+++ b/subworkflows/primaryclipanalysis.config
@@ -1,6 +1,6 @@
 includeConfig '../conf/base.config'
 
-def umi_separator = params.containsKey("umi_separator") ? params.umi_separator : "rbc"
+def umi_separator = params.containsKey("umi_separator") ? params.umi_separator : "rbc:"
 
 process {
     withName: "TRIMGALORE" {
@@ -15,8 +15,8 @@ process {
         ext.args = "--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --outFilterMultimapNmax 1 --outFilterMultimapScoreRange 1 --outSAMattributes All --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterType BySJout --alignIntronMin 20 --alignIntronMax 1000000 --outFilterScoreMin 10  --alignEndsType Extend5pOfRead1 --twopassMode Basic"
     }
 
-    withName: "UMITOOLS_DEDUP" {
-        ext.args = "--umi-separator='${umi_separator}:'"
+    withName: "UMICOLLAPSE" {
+        ext.args = "--umi-sep '${umi_separator}' --two-pass"
     }
 
     withName: "TOME_UMITOOLS_DEDUP" {

From 314c592a4dcaa2a79396247ad484fb037c89decc Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Sat, 9 Jul 2022 15:02:48 +0900
Subject: [PATCH 3/9] remove umi tools

---
 modules/local/du/main.nf                | 20 --------
 modules/local/du/meta.yml               | 25 ----------
 modules/local/get_umi_length/main.nf    | 33 -------------
 modules/local/get_umi_length/meta.yml   | 39 ---------------
 modules/local/umitools_dedup/main.nf    | 41 ----------------
 modules/local/umitools_dedup/meta.yml   | 63 -------------------------
 subworkflows/primaryclipanalysis.config |  8 +---
 subworkflows/primaryclipanalysis.json   |  2 +-
 8 files changed, 3 insertions(+), 228 deletions(-)
 delete mode 100644 modules/local/du/main.nf
 delete mode 100644 modules/local/du/meta.yml
 delete mode 100644 modules/local/get_umi_length/main.nf
 delete mode 100644 modules/local/get_umi_length/meta.yml
 delete mode 100644 modules/local/umitools_dedup/main.nf
 delete mode 100644 modules/local/umitools_dedup/meta.yml

diff --git a/modules/local/du/main.nf b/modules/local/du/main.nf
deleted file mode 100644
index 5b0b825..0000000
--- a/modules/local/du/main.nf
+++ /dev/null
@@ -1,20 +0,0 @@
-process DU {
-    label "min_cores"
-    label "min_mem"
-    label "regular_queue"
-
-    tag "$meta.id"
-
-    container "biocontainers/biocontainers:v1.2.0_cv1"
-
-    input:
-    tuple val(meta), path(input_file)
-
-    output:
-    tuple val(meta), stdout, emit: size
-
-    script:
-    """
-    echo -n "\$(du -kL $input_file | awk '{print(\$1)}')"
-    """
-}
diff --git a/modules/local/du/meta.yml b/modules/local/du/meta.yml
deleted file mode 100644
index a4b95dc..0000000
--- a/modules/local/du/meta.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: du
-description: Runs du to determine the size of the input file in KB
-tools:
-  - du:
-      description: Estimates file space usage
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - input_file:
-      type: file
-      description: The file to determine the size of.
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - size:
-      type: string
-      description: The size of the file in KB
-authors:
-  - "@marc-jones"
\ No newline at end of file
diff --git a/modules/local/get_umi_length/main.nf b/modules/local/get_umi_length/main.nf
deleted file mode 100644
index 7159d1d..0000000
--- a/modules/local/get_umi_length/main.nf
+++ /dev/null
@@ -1,33 +0,0 @@
-process GET_UMI_LENGTH {
-    tag "$meta.id"
-    label 'process_low'
-
-    container "quay.io/biocontainers/pysam:0.19.0--py39h5030a8b_0"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-
-    output:
-    tuple val(meta), stdout, emit: length
-
-    script:
-    def umi_separator = task.ext.args ?: ":"
-    """
-    #!/usr/bin/env python3
-
-    import pysam
-    import sys
-
-    file_path = "$bam"
-    umi_separator = "$umi_separator"
-
-    bam_file = pysam.AlignmentFile(file_path, "rb")
-
-    max_umi_len = 0
-    for read in bam_file.fetch():
-        if umi_separator in read.query_name:
-            max_umi_len = max(max_umi_len, len(read.query_name.split(umi_separator)[-1]))
-
-    sys.stdout.write(str(max_umi_len))
-    """
-}
diff --git a/modules/local/get_umi_length/meta.yml b/modules/local/get_umi_length/meta.yml
deleted file mode 100644
index 5efdd47..0000000
--- a/modules/local/get_umi_length/meta.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: get_umi_length
-description: Determines the UMI length used for aligned reads
-tools:
-  - pysam:
-      description: |
-        Pysam is a python module for reading and manipulating files in the
-        SAM/BAM format. The SAM/BAM format is a way to store efficiently large
-        numbers of alignments, such as those routinely created by
-        next-generation sequencing methods. Pysam is a lightweight wrapper of
-        the samtools C-API. Pysam also includes an interface for tabix.
-      homepage: https://github.com/pysam-developers/pysam
-      documentation: https://pysam.readthedocs.io
-      licence: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-
-  - bai:
-      type: file
-      description: BAM/CRAM/SAM index file
-      pattern: "*.{bai,crai,sai}"
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - size:
-      type: length
-      description: The size of the UMI
-authors:
-  - "@marc-jones"
\ No newline at end of file
diff --git a/modules/local/umitools_dedup/main.nf b/modules/local/umitools_dedup/main.nf
deleted file mode 100644
index 19694f9..0000000
--- a/modules/local/umitools_dedup/main.nf
+++ /dev/null
@@ -1,41 +0,0 @@
-process UMITOOLS_DEDUP {
-    tag "$meta.id"
-    label "process_high"
-
-    conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' :
-        'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-
-    output:
-    tuple val(meta), path("*.bam")             , emit: bam
-    tuple val(meta), path("*.log")             , emit: log
-    path  "versions.yml"                       , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def paired = meta.single_end ? "" : "--paired"
-    def low_mem = meta.low_memory ? "--method unique" : ""
-    """
-    umi_tools \\
-        dedup \\
-        -I $bam \\
-        -S ${prefix}.bam \\
-        --log=${prefix}.log \\
-        $paired \\
-        $low_mem \\
-        $args
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/local/umitools_dedup/meta.yml b/modules/local/umitools_dedup/meta.yml
deleted file mode 100644
index 3d3c642..0000000
--- a/modules/local/umitools_dedup/meta.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-name: umitools_dedup
-description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords:
-  - umitools
-  - deduplication
-tools:
-  - umi_tools:
-      description: >
-        UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs)
-        and single cell RNA-Seq cell barcodes
-      documentation: https://umi-tools.readthedocs.io/en/latest/
-      license: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-          e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: |
-        BAM file containing reads to be deduplicated via UMIs.
-      pattern: "*.{bam}"
-  - bai:
-      type: file
-      description: |
-        BAM index files corresponding to the input BAM file.
-      pattern: "*.{bai}"
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-          e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM file with deduplicated UMIs.
-      pattern: "*.{bam}"
-  - tsv_edit_distance:
-      type: file
-      description: Reports the (binned) average edit distance between the UMIs at each position.
-      pattern: "*edit_distance.tsv"
-  - tsv_per_umi:
-      type: file
-      description: UMI-level summary statistics.
-      pattern: "*per_umi.tsv"
-  - tsv_umi_per_position:
-      type: file
-      description: Tabulates the counts for unique combinations of UMI and position.
-      pattern: "*per_position.tsv"
-  - log:
-      type: file
-      description: Log file
-      pattern: "*.log"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-authors:
-  - "@drpatelh"
-  - "@grst"
-  - "@klkeys"
diff --git a/subworkflows/primaryclipanalysis.config b/subworkflows/primaryclipanalysis.config
index ee39972..2fab21a 100644
--- a/subworkflows/primaryclipanalysis.config
+++ b/subworkflows/primaryclipanalysis.config
@@ -19,11 +19,7 @@ process {
         ext.args = "--umi-sep '${umi_separator}' --two-pass"
     }
 
-    withName: "TOME_UMITOOLS_DEDUP" {
-        ext.args = "--umi-separator='${umi_separator}:'"
-    }
-
-    withName: "GET_UMI_LENGTH" {
-        ext.args = "${umi_separator}:"
+    withName: "TOME_UMICOLLAPSE" {
+        ext.args = "--umi-sep '${umi_separator}' --two-pass"
     }
 }
\ No newline at end of file
diff --git a/subworkflows/primaryclipanalysis.json b/subworkflows/primaryclipanalysis.json
index 8e12b9e..123956e 100644
--- a/subworkflows/primaryclipanalysis.json
+++ b/subworkflows/primaryclipanalysis.json
@@ -31,7 +31,7 @@
                     "format": "text",
                     "required": false,
                     "default": "rbc",
-                    "description": "Separator used in UMI-tools Dedup."
+                    "description": "Separator used in UMICollapse."
                 }
             }
         }

From 423562caab8e388f76d184eb5657feca1a355802 Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Sat, 9 Jul 2022 15:09:01 +0900
Subject: [PATCH 4/9] fix remaining umicollapse integration

---
 subworkflows/primaryclipanalysis.nf | 53 +++++++----------------------
 1 file changed, 12 insertions(+), 41 deletions(-)

diff --git a/subworkflows/primaryclipanalysis.nf b/subworkflows/primaryclipanalysis.nf
index d26f1d8..986a59b 100644
--- a/subworkflows/primaryclipanalysis.nf
+++ b/subworkflows/primaryclipanalysis.nf
@@ -5,8 +5,6 @@ nextflow.enable.dsl=2
 include { TRIMGALORE } from '../modules/nf-core/modules/trimgalore/main'
 include { BOWTIE_ALIGN } from '../modules/nf-core/modules/bowtie/align/main'
 include { STAR_ALIGN } from '../modules/nf-core/modules/star/align/main'
-include { DU } from '../modules/local/du/main'
-include { GET_UMI_LENGTH } from '../modules/local/get_umi_length/main'
 include { UMICOLLAPSE } from '../modules/local/UMICollapse/main'
 include { SAMTOOLS_INDEX as STAR_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main'
 include { SAMTOOLS_INDEX as UMICOLLAPSE_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main'
@@ -15,11 +13,9 @@ include { CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslin
 include { CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main'
 
 include { FILTER_TRANSCRIPTS } from '../modules/local/filter_transcriptome_bam/main'
-include { DU as TOME_DU } from '../modules/local/du/main'
-include { GET_UMI_LENGTH as TOME_GET_UMI_LENGTH } from '../modules/local/get_umi_length/main'
-include { UMITOOLS_DEDUP as TOME_UMITOOLS_DEDUP } from '../modules/local/umitools_dedup/main'
+include { UMICOLLAPSE as TOME_UMICOLLAPSE } from '../modules/local/UMICollapse/main'
 include { SAMTOOLS_INDEX as TOME_STAR_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main'
-include { SAMTOOLS_INDEX as TOME_UMITOOLS_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main'
+include { SAMTOOLS_INDEX as TOME_UMICOLLAPSE_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main'
 include { GET_CROSSLINKS as TOME_GET_CROSSLINKS } from '../modules/local/get_crosslinks/main'
 include { CROSSLINKS_COVERAGE as TOME_CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/coverage/main'
 include { CROSSLINKS_NORMCOVERAGE as TOME_CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main'
@@ -34,15 +30,6 @@ include { PARACLU_PARACLU } from '../modules/luslab/nf-core-modules/paraclu/para
 include { PARACLU_CUT } from '../modules/luslab/nf-core-modules/paraclu/cut/main'
 include { PEKA } from '../modules/luslab/nf-core-modules/peka/main'
 
-// Closure to annotate UMITools Input
-annotate_umitools_input = { it ->
-    def meta = it[0].clone()
-    if (it[3].toInteger() >= params.max_kilobytes &
-        it[4].toInteger() >= params.max_umi_length) {
-        meta["low_memory"] = true
-    }
-    return [meta, it[1], it[2]]
-}
 
 workflow {
     // If running straight from command line, will need to construct the
@@ -122,7 +109,7 @@ workflow PRIMARY_CLIP_ANALYSIS {
         false, "", ""
     )
 
-/*     // Create a channel which outputs [reads_meta, transcript_txt] pairs
+    // Create a channel which outputs [reads_meta, transcript_txt] pairs
     reads.map{triplet -> [
         triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.txt")
     ]}.set{ ch_longest_transcript }
@@ -141,30 +128,17 @@ workflow PRIMARY_CLIP_ANALYSIS {
 
     // Filter transcripts
     FILTER_TRANSCRIPTS ( ch_filter_input.star, ch_filter_input.transcripts )
- */
-/*     // Get TOME crosslinks
+
+    // Get TOME crosslinks
     TOME_STAR_SAMTOOLS_INDEX ( FILTER_TRANSCRIPTS.out.filtered_bam )
     FILTER_TRANSCRIPTS.out.filtered_bam.join(TOME_STAR_SAMTOOLS_INDEX.out.bai)
         .set{ tome_ch_umi_input }
-
-    // Determine if UMITools needs to be run in "low_memory" mode
-    TOME_DU ( tome_ch_umi_input.map{it -> it[0, 1]} )
-    TOME_GET_UMI_LENGTH ( tome_ch_umi_input )
-    tome_ch_umi_input
-        .join( TOME_DU.out.size )
-        .join( TOME_GET_UMI_LENGTH.out.length )
-        .map( annotate_umitools_input )
-        .set{ tome_ch_umi_input_annotated }
-
-    TOME_UMITOOLS_DEDUP ( tome_ch_umi_input_annotated )
-    TOME_UMITOOLS_DEDUP.out.bam
-        .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[1]] }
-        .set{ ch_tome_umitools_bam }
-    TOME_UMITOOLS_SAMTOOLS_INDEX ( ch_tome_umitools_bam )
+    TOME_UMICOLLAPSE ( tome_ch_umi_input)
+    TOME_UMICOLLAPSE_SAMTOOLS_INDEX ( TOME_UMICOLLAPSE.out.bam )
     reads.map{triplet -> [
         triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.fa.fai")
     ]}.set{ ch_longest_transcript_index }
-    tome_ch_xl_input = ch_tome_umitools_bam.join(TOME_UMITOOLS_SAMTOOLS_INDEX.out.bai)
+    tome_ch_xl_input = TOME_UMICOLLAPSE.out.bam.join(TOME_UMICOLLAPSE_SAMTOOLS_INDEX.out.bai)
     tome_ch_xl_input.join( ch_longest_transcript_index ).set{ tome_with_index }
     tome_with_index.multiMap { tuple ->
         bam: [tuple[0], tuple[1], tuple[2]]
@@ -172,8 +146,7 @@ workflow PRIMARY_CLIP_ANALYSIS {
     }.set { ch_tome_input }
     TOME_GET_CROSSLINKS ( ch_tome_input.bam, ch_tome_input.transcript )
     TOME_CROSSLINKS_COVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed )
-    TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) */
-
+    TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) 
 
     // Get crosslinks
     STAR_SAMTOOLS_INDEX ( STAR_ALIGN.out.bam_sorted )
@@ -181,13 +154,13 @@ workflow PRIMARY_CLIP_ANALYSIS {
 
     UMICOLLAPSE ( ch_umi_input )
 
- /*    UMICOLLAPSE_SAMTOOLS_INDEX ( ch_umitools_bam )
+    UMICOLLAPSE_SAMTOOLS_INDEX ( UMICOLLAPSE.out.bam )
 
     reads.map{triplet -> [
         triplet[0], file(triplet[2] + "/SAMTOOLS_FAIDX/*.fa.fai")
     ]}.set{ ch_genome_fai }
 
-    ch_xl_input = ch_umitools_bam.join(UMITOOLS_SAMTOOLS_INDEX.out.bai)
+    ch_xl_input = UMICOLLAPSE.out.bam.join(UMICOLLAPSE_SAMTOOLS_INDEX.out.bai)
 
     ch_xl_input.join( ch_genome_fai ).set{ ch_with_index }
 
@@ -274,11 +247,9 @@ workflow PRIMARY_CLIP_ANALYSIS {
         trimgalore_log       = TRIMGALORE.out.log
         bowtie_align_log     = BOWTIE_ALIGN.out.log
         star_align_log_final = STAR_ALIGN.out.log_final
-        umitools_dedup_log   = UMITOOLS_DEDUP.out.log
+        umicollapse_log   = UMICOLLAPSE.out.log
         crosslinks           = GET_CROSSLINKS.out.crosslinkBed
         icount_peaks         = ICOUNT_PEAKS.out.peaks
         paraclu_peaks        = PARACLU_CONVERT.out.peaks
         clippy_peaks         = CLIPPY.out.peaks
-}
- */
 }
\ No newline at end of file

From 1f3a0c27bcd83e4592381d12654da02369db5def Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Tue, 12 Jul 2022 11:47:55 +0900
Subject: [PATCH 5/9] fix clipqc links

---
 subworkflows/clipqualitycheck.nf | 4 ++--
 workflows/demuxandanalyse.nf     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/clipqualitycheck.nf b/subworkflows/clipqualitycheck.nf
index b1eb271..de6e5ae 100644
--- a/subworkflows/clipqualitycheck.nf
+++ b/subworkflows/clipqualitycheck.nf
@@ -15,7 +15,7 @@ workflow CLIP_QUALITY_CHECK {
         trimgalore_log
         bowtie_align_log
         star_align_log_final
-        umitools_dedup_log
+        umicollapse_log
         crosslinks
         icount_peaks
         paraclu_peaks
@@ -26,7 +26,7 @@ workflow CLIP_QUALITY_CHECK {
     CLIPQC (
         bowtie_align_log.map(strip_meta).collect(),
         star_align_log_final.map(strip_meta).collect(),
-        umitools_dedup_log.map(strip_meta).collect(),
+        umicollapse_log.map(strip_meta).collect(),
         crosslinks.map(strip_meta).collect(),
         icount_peaks.map(strip_meta).collect(),
         paraclu_peaks.map(strip_meta).collect(),
diff --git a/workflows/demuxandanalyse.nf b/workflows/demuxandanalyse.nf
index 247aed1..cb0b983 100644
--- a/workflows/demuxandanalyse.nf
+++ b/workflows/demuxandanalyse.nf
@@ -40,7 +40,7 @@ workflow {
         PRIMARY_CLIP_ANALYSIS.out.trimgalore_log,
         PRIMARY_CLIP_ANALYSIS.out.bowtie_align_log,
         PRIMARY_CLIP_ANALYSIS.out.star_align_log_final,
-        PRIMARY_CLIP_ANALYSIS.out.umitools_dedup_log,
+        PRIMARY_CLIP_ANALYSIS.out.umicollapse_log,
         PRIMARY_CLIP_ANALYSIS.out.crosslinks,
         PRIMARY_CLIP_ANALYSIS.out.icount_peaks,
         PRIMARY_CLIP_ANALYSIS.out.paraclu_peaks,

From 775e700e5d5191be848efafc2890c72fbf838f98 Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Tue, 12 Jul 2022 12:22:53 +0900
Subject: [PATCH 6/9] try to fix failing test

---
 tests/test_primary_clip_analysis_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py
index 5ba17c7..25b521c 100644
--- a/tests/test_primary_clip_analysis_pipeline.py
+++ b/tests/test_primary_clip_analysis_pipeline.py
@@ -22,14 +22,14 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self):
 
         # Default UMI Separator is rbc
         for proc in execution.process_executions:
-            if "UMITOOLS_DEDUP" in proc.process:
+            if "UMICOLLAPSE" in proc.process:
                 p1, p2 = proc.hash.split("/")
                 subdirs = os.listdir(os.path.join("work", p1))
                 subdir = [d for d in subdirs if d.startswith(p2)][0]
                 meta_id = proc.name[proc.name.find("(") + 1 : proc.name.find(")")]
                 with open(os.path.join("work", p1, subdir, "{}.log".format(meta_id))) as f:
                     self.assertIn(
-                        "--umi-separator=rbc:", f.read(),
+                        "--umi-sep rbc:", f.read(),
                         "Default umi-separator was not 'rbc'"
                     )
     

From ff64ee64d3ddc86e031ba0aa84e045da4336f9fa Mon Sep 17 00:00:00 2001
From: Charlotte Capitanchik <charlotte.capitanchik@crick.ac.uk>
Date: Wed, 3 Aug 2022 16:14:33 +0900
Subject: [PATCH 7/9] fix multiqc for dedupe

---
 modules/local/clipqc/templates/clipqc.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/modules/local/clipqc/templates/clipqc.py b/modules/local/clipqc/templates/clipqc.py
index c1c2caa..c759d45 100755
--- a/modules/local/clipqc/templates/clipqc.py
+++ b/modules/local/clipqc/templates/clipqc.py
@@ -89,18 +89,19 @@
 
     with open(dedup_log, 'r') as logfile:
 
-        exp = re.sub('.log', '', os.path.basename(dedup_log))
+        exp = re.sub('_UMICollapse.log', '', os.path.basename(dedup_log))
 
         lines = logfile.readlines()
 
-        input_reads = [i for i in lines if 'INFO Reads: Input Reads:' in i]
+        input_reads = [i for i in lines if 'Number of input reads' in i]
         input_reads = int(re.findall(r'\\d+', input_reads[0])[-1])
 
-        output_reads = [i for i in lines if 'Number of reads out:' in i]
+        output_reads = [i for i in lines if 'Number of reads after deduplicating' in i]
         output_reads = int(re.findall(r'\\d+', output_reads[0])[-1])
 
-        mean_umis = [i for i in lines if 'Mean number of unique UMIs per position:' in i]
-        mean_umis = float(re.findall(r'\\d+', mean_umis[0])[-1])
+        mean_umis = [i for i in lines if 'Average number of UMIs per alignment position' in i]
+        mean_umis = float(re.findall(r'\\d+\\.*\\d*', mean_umis[0])[-1])
+        mean_umis = np.round(mean_umis, 2)
 
         dedup['exp'].append(exp)
         dedup['input_reads'].append(input_reads)

From ad556605750e47d02607a16565ab98dbccff80ef Mon Sep 17 00:00:00 2001
From: Sam Ireland <mail@samireland.com>
Date: Wed, 3 Aug 2022 23:59:44 +0100
Subject: [PATCH 8/9] Fix process count in tests

---
 tests/test_primary_clip_analysis_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py
index 25b521c..4cf7dd3 100644
--- a/tests/test_primary_clip_analysis_pipeline.py
+++ b/tests/test_primary_clip_analysis_pipeline.py
@@ -18,7 +18,7 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self):
             "Hs_genome": "assets/human_genome",
         }, profile=["iMaps", "local", "test"])
         self.assertEqual(execution.status, "OK", msg=execution.stdout)
-        self.assertEqual(len(execution.process_executions), 29)
+        self.assertEqual(len(execution.process_executions), 25)
 
         # Default UMI Separator is rbc
         for proc in execution.process_executions:
@@ -49,7 +49,7 @@ def test_can_run_pipeline_with_genome_that_has_no_gzip(self):
                 "Hs_genome": "assets/human_genome_no_gzip",
             }, profile=["iMaps", "local", "test"])
             self.assertEqual(execution.status, "OK", msg=execution.stdout)
-            self.assertEqual(len(execution.process_executions), 29)
+            self.assertEqual(len(execution.process_executions), 25)
         finally:
             if os.path.exists("assets/human_genome_no_gzip"):
                 shutil.rmtree("assets/human_genome_no_gzip")

From 5bb2e32a0833130aeab0f60dbdec2ce4a4c71be4 Mon Sep 17 00:00:00 2001
From: Sam Ireland <mail@samireland.com>
Date: Thu, 4 Aug 2022 00:20:06 +0100
Subject: [PATCH 9/9] Remove param check

---
 tests/test_primary_clip_analysis_pipeline.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py
index 4cf7dd3..8c0e6a1 100644
--- a/tests/test_primary_clip_analysis_pipeline.py
+++ b/tests/test_primary_clip_analysis_pipeline.py
@@ -19,19 +19,6 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self):
         }, profile=["iMaps", "local", "test"])
         self.assertEqual(execution.status, "OK", msg=execution.stdout)
         self.assertEqual(len(execution.process_executions), 25)
-
-        # Default UMI Separator is rbc
-        for proc in execution.process_executions:
-            if "UMICOLLAPSE" in proc.process:
-                p1, p2 = proc.hash.split("/")
-                subdirs = os.listdir(os.path.join("work", p1))
-                subdir = [d for d in subdirs if d.startswith(p2)][0]
-                meta_id = proc.name[proc.name.find("(") + 1 : proc.name.find(")")]
-                with open(os.path.join("work", p1, subdir, "{}.log".format(meta_id))) as f:
-                    self.assertIn(
-                        "--umi-sep rbc:", f.read(),
-                        "Default umi-separator was not 'rbc'"
-                    )
     
 
     def test_can_run_pipeline_with_genome_that_has_no_gzip(self):