From 8db63924402d61c597a70e1fec8d46373a0cf12c Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Fri, 8 Jul 2022 16:43:19 +0900 Subject: [PATCH 1/9] first try at umicollapse still running into issues with the docker image --- modules/local/UMICollapse/main.nf | 36 +++++++++++++++++++++++ modules/local/UMICollapse/meta.yml | 45 +++++++++++++++++++++++++++++ subworkflows/primaryclipanalysis.nf | 39 +++++++------------------ 3 files changed, 91 insertions(+), 29 deletions(-) create mode 100644 modules/local/UMICollapse/main.nf create mode 100644 modules/local/UMICollapse/meta.yml diff --git a/modules/local/UMICollapse/main.nf b/modules/local/UMICollapse/main.nf new file mode 100644 index 0000000..60452e6 --- /dev/null +++ b/modules/local/UMICollapse/main.nf @@ -0,0 +1,36 @@ +process UMICOLLAPSE { + tag "$meta.id" + label "process_high" + + container 'elly1502/umicollapse:latest' + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + ./umicollapse \\ + bam \\ + -i $bam \\ + -o ${prefix}.bam \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umicollapse: NA + END_VERSIONS + """ +} + diff --git a/modules/local/UMICollapse/meta.yml b/modules/local/UMICollapse/meta.yml new file mode 100644 index 0000000..fbd3918 --- /dev/null +++ b/modules/local/UMICollapse/meta.yml @@ -0,0 +1,45 @@ +name: umicollapse +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umicollapse + - deduplication +tools: + - umicollapse: + description: > + UMICollapse contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs). + documentation: https://github.com/Daniel-Liu-c0deb0t/UMICollapse + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Daniel-Liu-c0deb0t" + - "@CharlotteAnne" diff --git a/subworkflows/primaryclipanalysis.nf b/subworkflows/primaryclipanalysis.nf index fa9da2a..d26f1d8 100644 --- a/subworkflows/primaryclipanalysis.nf +++ b/subworkflows/primaryclipanalysis.nf @@ -7,9 +7,9 @@ include { BOWTIE_ALIGN } from '../modules/nf-core/modules/bowtie/align/main' include { STAR_ALIGN } from '../modules/nf-core/modules/star/align/main' include { DU } from '../modules/local/du/main' include { GET_UMI_LENGTH } from '../modules/local/get_umi_length/main' -include { UMITOOLS_DEDUP } from '../modules/local/umitools_dedup/main' +include { UMICOLLAPSE } from '../modules/local/UMICollapse/main' include { SAMTOOLS_INDEX as STAR_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_INDEX as UMITOOLS_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_INDEX as UMICOLLAPSE_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main' include { GET_CROSSLINKS } from '../modules/local/get_crosslinks/main' include { CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/coverage/main' include { CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main' @@ -122,7 +122,7 @@ workflow PRIMARY_CLIP_ANALYSIS { false, "", "" ) - // Create a channel which outputs [reads_meta, transcript_txt] pairs +/* // Create a channel which outputs [reads_meta, transcript_txt] pairs reads.map{triplet -> [ triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.txt") ]}.set{ ch_longest_transcript } @@ -141,8 +141,8 @@ workflow PRIMARY_CLIP_ANALYSIS { // Filter transcripts FILTER_TRANSCRIPTS ( ch_filter_input.star, ch_filter_input.transcripts ) - - // Get TOME crosslinks + */ +/* // Get TOME crosslinks TOME_STAR_SAMTOOLS_INDEX ( FILTER_TRANSCRIPTS.out.filtered_bam ) FILTER_TRANSCRIPTS.out.filtered_bam.join(TOME_STAR_SAMTOOLS_INDEX.out.bai) .set{ tome_ch_umi_input } @@ -172,37 +172,16 @@ workflow PRIMARY_CLIP_ANALYSIS { }.set { ch_tome_input } TOME_GET_CROSSLINKS ( ch_tome_input.bam, ch_tome_input.transcript ) TOME_CROSSLINKS_COVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) - TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) + TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) */ // Get crosslinks STAR_SAMTOOLS_INDEX ( STAR_ALIGN.out.bam_sorted ) ch_umi_input = STAR_ALIGN.out.bam_sorted.combine(STAR_SAMTOOLS_INDEX.out.bai, by: 0) - // Determine if UMITools needs to be run in "low_memory" mode - DU ( ch_umi_input.map{it -> it[0, 1]} ) - GET_UMI_LENGTH ( ch_umi_input ) - ch_umi_input - .join( DU.out.size ) - .join( GET_UMI_LENGTH.out.length ) - .map( annotate_umitools_input ) - .set{ ch_umi_input_annotated } - - UMITOOLS_DEDUP ( ch_umi_input_annotated ) - - // Strip out the low_memory key from the meta value so that the later joins - // actually work - UMITOOLS_DEDUP.out.bam - .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[1]] } - .set{ ch_umitools_bam } - // Keep a channel for converting between meta with the low_memory key and - // without, in case in the future you want to keep track of which files were - // run as low mem - UMITOOLS_DEDUP.out.bam - .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[0]] } - .set{ ch_meta_conversion } + UMICOLLAPSE ( ch_umi_input ) - UMITOOLS_SAMTOOLS_INDEX ( ch_umitools_bam ) + /* UMICOLLAPSE_SAMTOOLS_INDEX ( ch_umitools_bam ) reads.map{triplet -> [ triplet[0], file(triplet[2] + "/SAMTOOLS_FAIDX/*.fa.fai") @@ -301,3 +280,5 @@ workflow PRIMARY_CLIP_ANALYSIS { paraclu_peaks = PARACLU_CONVERT.out.peaks clippy_peaks = CLIPPY.out.peaks } + */ +} \ No newline at end of file From f06ca3a7d9be333a8def7d9c23eec5a9da36ddf6 Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Fri, 8 Jul 2022 17:34:36 +0900 Subject: [PATCH 2/9] umicollapse working now --- modules/local/UMICollapse/main.nf | 3 ++- subworkflows/primaryclipanalysis.config | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/local/UMICollapse/main.nf b/modules/local/UMICollapse/main.nf index 60452e6..037c9c3 100644 --- a/modules/local/UMICollapse/main.nf +++ b/modules/local/UMICollapse/main.nf @@ -20,12 +20,13 @@ process UMICOLLAPSE { def prefix = task.ext.prefix ?: "${meta.id}" """ - ./umicollapse \\ + java -jar /UMICollapse/umicollapse.jar \\ bam \\ -i $bam \\ -o ${prefix}.bam \\ $args + mv .command.log ${prefix}_UMICollapse.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/primaryclipanalysis.config b/subworkflows/primaryclipanalysis.config index 3328ae1..ee39972 100644 --- a/subworkflows/primaryclipanalysis.config +++ b/subworkflows/primaryclipanalysis.config @@ -1,6 +1,6 @@ includeConfig '../conf/base.config' -def umi_separator = params.containsKey("umi_separator") ? params.umi_separator : "rbc" +def umi_separator = params.containsKey("umi_separator") ? params.umi_separator : "rbc:" process { withName: "TRIMGALORE" { @@ -15,8 +15,8 @@ process { ext.args = "--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --outFilterMultimapNmax 1 --outFilterMultimapScoreRange 1 --outSAMattributes All --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterType BySJout --alignIntronMin 20 --alignIntronMax 1000000 --outFilterScoreMin 10 --alignEndsType Extend5pOfRead1 --twopassMode Basic" } - withName: "UMITOOLS_DEDUP" { - ext.args = "--umi-separator='${umi_separator}:'" + withName: "UMICOLLAPSE" { + ext.args = "--umi-sep '${umi_separator}' --two-pass" } withName: "TOME_UMITOOLS_DEDUP" { From 314c592a4dcaa2a79396247ad484fb037c89decc Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Sat, 9 Jul 2022 15:02:48 +0900 Subject: [PATCH 3/9] remove umi tools --- modules/local/du/main.nf | 20 -------- modules/local/du/meta.yml | 25 ---------- modules/local/get_umi_length/main.nf | 33 ------------- modules/local/get_umi_length/meta.yml | 39 --------------- modules/local/umitools_dedup/main.nf | 41 ---------------- modules/local/umitools_dedup/meta.yml | 63 ------------------------- subworkflows/primaryclipanalysis.config | 8 +--- subworkflows/primaryclipanalysis.json | 2 +- 8 files changed, 3 insertions(+), 228 deletions(-) delete mode 100644 modules/local/du/main.nf delete mode 100644 modules/local/du/meta.yml delete mode 100644 modules/local/get_umi_length/main.nf delete mode 100644 modules/local/get_umi_length/meta.yml delete mode 100644 modules/local/umitools_dedup/main.nf delete mode 100644 modules/local/umitools_dedup/meta.yml diff --git a/modules/local/du/main.nf b/modules/local/du/main.nf deleted file mode 100644 index 5b0b825..0000000 --- a/modules/local/du/main.nf +++ /dev/null @@ -1,20 +0,0 @@ -process DU { - label "min_cores" - label "min_mem" - label "regular_queue" - - tag "$meta.id" - - container "biocontainers/biocontainers:v1.2.0_cv1" - - input: - tuple val(meta), path(input_file) - - output: - tuple val(meta), stdout, emit: size - - script: - """ - echo -n "\$(du -kL $input_file | awk '{print(\$1)}')" - """ -} diff --git a/modules/local/du/meta.yml b/modules/local/du/meta.yml deleted file mode 100644 index a4b95dc..0000000 --- a/modules/local/du/meta.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: du -description: Runs du to determine the size of the input file in KB -tools: - - du: - description: Estimates file space usage -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_file: - type: file - description: The file to determine the size of. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - size: - type: string - description: The size of the file in KB -authors: - - "@marc-jones" \ No newline at end of file diff --git a/modules/local/get_umi_length/main.nf b/modules/local/get_umi_length/main.nf deleted file mode 100644 index 7159d1d..0000000 --- a/modules/local/get_umi_length/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process GET_UMI_LENGTH { - tag "$meta.id" - label 'process_low' - - container "quay.io/biocontainers/pysam:0.19.0--py39h5030a8b_0" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), stdout, emit: length - - script: - def umi_separator = task.ext.args ?: ":" - """ - #!/usr/bin/env python3 - - import pysam - import sys - - file_path = "$bam" - umi_separator = "$umi_separator" - - bam_file = pysam.AlignmentFile(file_path, "rb") - - max_umi_len = 0 - for read in bam_file.fetch(): - if umi_separator in read.query_name: - max_umi_len = max(max_umi_len, len(read.query_name.split(umi_separator)[-1])) - - sys.stdout.write(str(max_umi_len)) - """ -} diff --git a/modules/local/get_umi_length/meta.yml b/modules/local/get_umi_length/meta.yml deleted file mode 100644 index 5efdd47..0000000 --- a/modules/local/get_umi_length/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: get_umi_length -description: Determines the UMI length used for aligned reads -tools: - - pysam: - description: | - Pysam is a python module for reading and manipulating files in the - SAM/BAM format. The SAM/BAM format is a way to store efficiently large - numbers of alignments, such as those routinely created by - next-generation sequencing methods. Pysam is a lightweight wrapper of - the samtools C-API. Pysam also includes an interface for tabix. - homepage: https://github.com/pysam-developers/pysam - documentation: https://pysam.readthedocs.io - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - size: - type: length - description: The size of the UMI -authors: - - "@marc-jones" \ No newline at end of file diff --git a/modules/local/umitools_dedup/main.nf b/modules/local/umitools_dedup/main.nf deleted file mode 100644 index 19694f9..0000000 --- a/modules/local/umitools_dedup/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process UMITOOLS_DEDUP { - tag "$meta.id" - label "process_high" - - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - def low_mem = meta.low_memory ? "--method unique" : "" - """ - umi_tools \\ - dedup \\ - -I $bam \\ - -S ${prefix}.bam \\ - --log=${prefix}.log \\ - $paired \\ - $low_mem \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ -} diff --git a/modules/local/umitools_dedup/meta.yml b/modules/local/umitools_dedup/meta.yml deleted file mode 100644 index 3d3c642..0000000 --- a/modules/local/umitools_dedup/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. -keywords: - - umitools - - deduplication -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" - - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" - - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" - - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" - - log: - type: file - description: Log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" - - "@klkeys" diff --git a/subworkflows/primaryclipanalysis.config b/subworkflows/primaryclipanalysis.config index ee39972..2fab21a 100644 --- a/subworkflows/primaryclipanalysis.config +++ b/subworkflows/primaryclipanalysis.config @@ -19,11 +19,7 @@ process { ext.args = "--umi-sep '${umi_separator}' --two-pass" } - withName: "TOME_UMITOOLS_DEDUP" { - ext.args = "--umi-separator='${umi_separator}:'" - } - - withName: "GET_UMI_LENGTH" { - ext.args = "${umi_separator}:" + withName: "TOME_UMICOLLAPSE" { + ext.args = "--umi-sep '${umi_separator}' --two-pass" } } \ No newline at end of file diff --git a/subworkflows/primaryclipanalysis.json b/subworkflows/primaryclipanalysis.json index 8e12b9e..123956e 100644 --- a/subworkflows/primaryclipanalysis.json +++ b/subworkflows/primaryclipanalysis.json @@ -31,7 +31,7 @@ "format": "text", "required": false, "default": "rbc", - "description": "Separator used in UMI-tools Dedup." + "description": "Separator used in UMICollapse." } } } From 423562caab8e388f76d184eb5657feca1a355802 Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Sat, 9 Jul 2022 15:09:01 +0900 Subject: [PATCH 4/9] fix remaining umicollapse integration --- subworkflows/primaryclipanalysis.nf | 53 +++++++---------------------- 1 file changed, 12 insertions(+), 41 deletions(-) diff --git a/subworkflows/primaryclipanalysis.nf b/subworkflows/primaryclipanalysis.nf index d26f1d8..986a59b 100644 --- a/subworkflows/primaryclipanalysis.nf +++ b/subworkflows/primaryclipanalysis.nf @@ -5,8 +5,6 @@ nextflow.enable.dsl=2 include { TRIMGALORE } from '../modules/nf-core/modules/trimgalore/main' include { BOWTIE_ALIGN } from '../modules/nf-core/modules/bowtie/align/main' include { STAR_ALIGN } from '../modules/nf-core/modules/star/align/main' -include { DU } from '../modules/local/du/main' -include { GET_UMI_LENGTH } from '../modules/local/get_umi_length/main' include { UMICOLLAPSE } from '../modules/local/UMICollapse/main' include { SAMTOOLS_INDEX as STAR_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main' include { SAMTOOLS_INDEX as UMICOLLAPSE_SAMTOOLS_INDEX} from '../modules/nf-core/modules/samtools/index/main' @@ -15,11 +13,9 @@ include { CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslin include { CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main' include { FILTER_TRANSCRIPTS } from '../modules/local/filter_transcriptome_bam/main' -include { DU as TOME_DU } from '../modules/local/du/main' -include { GET_UMI_LENGTH as TOME_GET_UMI_LENGTH } from '../modules/local/get_umi_length/main' -include { UMITOOLS_DEDUP as TOME_UMITOOLS_DEDUP } from '../modules/local/umitools_dedup/main' +include { UMICOLLAPSE as TOME_UMICOLLAPSE } from '../modules/local/UMICollapse/main' include { SAMTOOLS_INDEX as TOME_STAR_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_INDEX as TOME_UMITOOLS_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' +include { SAMTOOLS_INDEX as TOME_UMICOLLAPSE_SAMTOOLS_INDEX } from '../modules/nf-core/modules/samtools/index/main' include { GET_CROSSLINKS as TOME_GET_CROSSLINKS } from '../modules/local/get_crosslinks/main' include { CROSSLINKS_COVERAGE as TOME_CROSSLINKS_COVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/coverage/main' include { CROSSLINKS_NORMCOVERAGE as TOME_CROSSLINKS_NORMCOVERAGE } from '../modules/luslab/nf-core-modules/crosslinks/normcoverage/main' @@ -34,15 +30,6 @@ include { PARACLU_PARACLU } from '../modules/luslab/nf-core-modules/paraclu/para include { PARACLU_CUT } from '../modules/luslab/nf-core-modules/paraclu/cut/main' include { PEKA } from '../modules/luslab/nf-core-modules/peka/main' -// Closure to annotate UMITools Input -annotate_umitools_input = { it -> - def meta = it[0].clone() - if (it[3].toInteger() >= params.max_kilobytes & - it[4].toInteger() >= params.max_umi_length) { - meta["low_memory"] = true - } - return [meta, it[1], it[2]] -} workflow { // If running straight from command line, will need to construct the @@ -122,7 +109,7 @@ workflow PRIMARY_CLIP_ANALYSIS { false, "", "" ) -/* // Create a channel which outputs [reads_meta, transcript_txt] pairs + // Create a channel which outputs [reads_meta, transcript_txt] pairs reads.map{triplet -> [ triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.txt") ]}.set{ ch_longest_transcript } @@ -141,30 +128,17 @@ workflow PRIMARY_CLIP_ANALYSIS { // Filter transcripts FILTER_TRANSCRIPTS ( ch_filter_input.star, ch_filter_input.transcripts ) - */ -/* // Get TOME crosslinks + + // Get TOME crosslinks TOME_STAR_SAMTOOLS_INDEX ( FILTER_TRANSCRIPTS.out.filtered_bam ) FILTER_TRANSCRIPTS.out.filtered_bam.join(TOME_STAR_SAMTOOLS_INDEX.out.bai) .set{ tome_ch_umi_input } - - // Determine if UMITools needs to be run in "low_memory" mode - TOME_DU ( tome_ch_umi_input.map{it -> it[0, 1]} ) - TOME_GET_UMI_LENGTH ( tome_ch_umi_input ) - tome_ch_umi_input - .join( TOME_DU.out.size ) - .join( TOME_GET_UMI_LENGTH.out.length ) - .map( annotate_umitools_input ) - .set{ tome_ch_umi_input_annotated } - - TOME_UMITOOLS_DEDUP ( tome_ch_umi_input_annotated ) - TOME_UMITOOLS_DEDUP.out.bam - .map{ it -> [it[0].findAll{key, val -> key != "low_memory"}, it[1]] } - .set{ ch_tome_umitools_bam } - TOME_UMITOOLS_SAMTOOLS_INDEX ( ch_tome_umitools_bam ) + TOME_UMICOLLAPSE ( tome_ch_umi_input) + TOME_UMICOLLAPSE_SAMTOOLS_INDEX ( TOME_UMICOLLAPSE.out.bam ) reads.map{triplet -> [ triplet[0], file(triplet[2] + "/FIND_LONGEST_TRANSCRIPT/*.fa.fai") ]}.set{ ch_longest_transcript_index } - tome_ch_xl_input = ch_tome_umitools_bam.join(TOME_UMITOOLS_SAMTOOLS_INDEX.out.bai) + tome_ch_xl_input = TOME_UMICOLLAPSE.out.bam.join(TOME_UMICOLLAPSE_SAMTOOLS_INDEX.out.bai) tome_ch_xl_input.join( ch_longest_transcript_index ).set{ tome_with_index } tome_with_index.multiMap { tuple -> bam: [tuple[0], tuple[1], tuple[2]] @@ -172,8 +146,7 @@ workflow PRIMARY_CLIP_ANALYSIS { }.set { ch_tome_input } TOME_GET_CROSSLINKS ( ch_tome_input.bam, ch_tome_input.transcript ) TOME_CROSSLINKS_COVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) - TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) */ - + TOME_CROSSLINKS_NORMCOVERAGE ( TOME_GET_CROSSLINKS.out.crosslinkBed ) // Get crosslinks STAR_SAMTOOLS_INDEX ( STAR_ALIGN.out.bam_sorted ) @@ -181,13 +154,13 @@ workflow PRIMARY_CLIP_ANALYSIS { UMICOLLAPSE ( ch_umi_input ) - /* UMICOLLAPSE_SAMTOOLS_INDEX ( ch_umitools_bam ) + UMICOLLAPSE_SAMTOOLS_INDEX ( UMICOLLAPSE.out.bam ) reads.map{triplet -> [ triplet[0], file(triplet[2] + "/SAMTOOLS_FAIDX/*.fa.fai") ]}.set{ ch_genome_fai } - ch_xl_input = ch_umitools_bam.join(UMITOOLS_SAMTOOLS_INDEX.out.bai) + ch_xl_input = UMICOLLAPSE.out.bam.join(UMICOLLAPSE_SAMTOOLS_INDEX.out.bai) ch_xl_input.join( ch_genome_fai ).set{ ch_with_index } @@ -274,11 +247,9 @@ workflow PRIMARY_CLIP_ANALYSIS { trimgalore_log = TRIMGALORE.out.log bowtie_align_log = BOWTIE_ALIGN.out.log star_align_log_final = STAR_ALIGN.out.log_final - umitools_dedup_log = UMITOOLS_DEDUP.out.log + umicollapse_log = UMICOLLAPSE.out.log crosslinks = GET_CROSSLINKS.out.crosslinkBed icount_peaks = ICOUNT_PEAKS.out.peaks paraclu_peaks = PARACLU_CONVERT.out.peaks clippy_peaks = CLIPPY.out.peaks -} - */ } \ No newline at end of file From 1f3a0c27bcd83e4592381d12654da02369db5def Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Tue, 12 Jul 2022 11:47:55 +0900 Subject: [PATCH 5/9] fix clipqc links --- subworkflows/clipqualitycheck.nf | 4 ++-- workflows/demuxandanalyse.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/clipqualitycheck.nf b/subworkflows/clipqualitycheck.nf index b1eb271..de6e5ae 100644 --- a/subworkflows/clipqualitycheck.nf +++ b/subworkflows/clipqualitycheck.nf @@ -15,7 +15,7 @@ workflow CLIP_QUALITY_CHECK { trimgalore_log bowtie_align_log star_align_log_final - umitools_dedup_log + umicollapse_log crosslinks icount_peaks paraclu_peaks @@ -26,7 +26,7 @@ workflow CLIP_QUALITY_CHECK { CLIPQC ( bowtie_align_log.map(strip_meta).collect(), star_align_log_final.map(strip_meta).collect(), - umitools_dedup_log.map(strip_meta).collect(), + umicollapse_log.map(strip_meta).collect(), crosslinks.map(strip_meta).collect(), icount_peaks.map(strip_meta).collect(), paraclu_peaks.map(strip_meta).collect(), diff --git a/workflows/demuxandanalyse.nf b/workflows/demuxandanalyse.nf index 247aed1..cb0b983 100644 --- a/workflows/demuxandanalyse.nf +++ b/workflows/demuxandanalyse.nf @@ -40,7 +40,7 @@ workflow { PRIMARY_CLIP_ANALYSIS.out.trimgalore_log, PRIMARY_CLIP_ANALYSIS.out.bowtie_align_log, PRIMARY_CLIP_ANALYSIS.out.star_align_log_final, - PRIMARY_CLIP_ANALYSIS.out.umitools_dedup_log, + PRIMARY_CLIP_ANALYSIS.out.umicollapse_log, PRIMARY_CLIP_ANALYSIS.out.crosslinks, PRIMARY_CLIP_ANALYSIS.out.icount_peaks, PRIMARY_CLIP_ANALYSIS.out.paraclu_peaks, From 775e700e5d5191be848efafc2890c72fbf838f98 Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Tue, 12 Jul 2022 12:22:53 +0900 Subject: [PATCH 6/9] try to fix failing test --- tests/test_primary_clip_analysis_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py index 5ba17c7..25b521c 100644 --- a/tests/test_primary_clip_analysis_pipeline.py +++ b/tests/test_primary_clip_analysis_pipeline.py @@ -22,14 +22,14 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self): # Default UMI Separator is rbc for proc in execution.process_executions: - if "UMITOOLS_DEDUP" in proc.process: + if "UMICOLLAPSE" in proc.process: p1, p2 = proc.hash.split("/") subdirs = os.listdir(os.path.join("work", p1)) subdir = [d for d in subdirs if d.startswith(p2)][0] meta_id = proc.name[proc.name.find("(") + 1 : proc.name.find(")")] with open(os.path.join("work", p1, subdir, "{}.log".format(meta_id))) as f: self.assertIn( - "--umi-separator=rbc:", f.read(), + "--umi-sep rbc:", f.read(), "Default umi-separator was not 'rbc'" ) From ff64ee64d3ddc86e031ba0aa84e045da4336f9fa Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Wed, 3 Aug 2022 16:14:33 +0900 Subject: [PATCH 7/9] fix multiqc for dedupe --- modules/local/clipqc/templates/clipqc.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/local/clipqc/templates/clipqc.py b/modules/local/clipqc/templates/clipqc.py index c1c2caa..c759d45 100755 --- a/modules/local/clipqc/templates/clipqc.py +++ b/modules/local/clipqc/templates/clipqc.py @@ -89,18 +89,19 @@ with open(dedup_log, 'r') as logfile: - exp = re.sub('.log', '', os.path.basename(dedup_log)) + exp = re.sub('_UMICollapse.log', '', os.path.basename(dedup_log)) lines = logfile.readlines() - input_reads = [i for i in lines if 'INFO Reads: Input Reads:' in i] + input_reads = [i for i in lines if 'Number of input reads' in i] input_reads = int(re.findall(r'\\d+', input_reads[0])[-1]) - output_reads = [i for i in lines if 'Number of reads out:' in i] + output_reads = [i for i in lines if 'Number of reads after deduplicating' in i] output_reads = int(re.findall(r'\\d+', output_reads[0])[-1]) - mean_umis = [i for i in lines if 'Mean number of unique UMIs per position:' in i] - mean_umis = float(re.findall(r'\\d+', mean_umis[0])[-1]) + mean_umis = [i for i in lines if 'Average number of UMIs per alignment position' in i] + mean_umis = float(re.findall(r'\\d+\\.*\\d*', mean_umis[0])[-1]) + mean_umis = np.round(mean_umis, 2) dedup['exp'].append(exp) dedup['input_reads'].append(input_reads) From ad556605750e47d02607a16565ab98dbccff80ef Mon Sep 17 00:00:00 2001 From: Sam Ireland Date: Wed, 3 Aug 2022 23:59:44 +0100 Subject: [PATCH 8/9] Fix process count in tests --- tests/test_primary_clip_analysis_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py index 25b521c..4cf7dd3 100644 --- a/tests/test_primary_clip_analysis_pipeline.py +++ b/tests/test_primary_clip_analysis_pipeline.py @@ -18,7 +18,7 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self): "Hs_genome": "assets/human_genome", }, profile=["iMaps", "local", "test"]) self.assertEqual(execution.status, "OK", msg=execution.stdout) - self.assertEqual(len(execution.process_executions), 29) + self.assertEqual(len(execution.process_executions), 25) # Default UMI Separator is rbc for proc in execution.process_executions: @@ -49,7 +49,7 @@ def test_can_run_pipeline_with_genome_that_has_no_gzip(self): "Hs_genome": "assets/human_genome_no_gzip", }, profile=["iMaps", "local", "test"]) self.assertEqual(execution.status, "OK", msg=execution.stdout) - self.assertEqual(len(execution.process_executions), 29) + self.assertEqual(len(execution.process_executions), 25) finally: if os.path.exists("assets/human_genome_no_gzip"): shutil.rmtree("assets/human_genome_no_gzip") From 5bb2e32a0833130aeab0f60dbdec2ce4a4c71be4 Mon Sep 17 00:00:00 2001 From: Sam Ireland Date: Thu, 4 Aug 2022 00:20:06 +0100 Subject: [PATCH 9/9] Remove param check --- tests/test_primary_clip_analysis_pipeline.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/test_primary_clip_analysis_pipeline.py b/tests/test_primary_clip_analysis_pipeline.py index 4cf7dd3..8c0e6a1 100644 --- a/tests/test_primary_clip_analysis_pipeline.py +++ b/tests/test_primary_clip_analysis_pipeline.py @@ -19,19 +19,6 @@ def test_can_run_pipeline_with_genome_that_has_gzip(self): }, profile=["iMaps", "local", "test"]) self.assertEqual(execution.status, "OK", msg=execution.stdout) self.assertEqual(len(execution.process_executions), 25) - - # Default UMI Separator is rbc - for proc in execution.process_executions: - if "UMICOLLAPSE" in proc.process: - p1, p2 = proc.hash.split("/") - subdirs = os.listdir(os.path.join("work", p1)) - subdir = [d for d in subdirs if d.startswith(p2)][0] - meta_id = proc.name[proc.name.find("(") + 1 : proc.name.find(")")] - with open(os.path.join("work", p1, subdir, "{}.log".format(meta_id))) as f: - self.assertIn( - "--umi-sep rbc:", f.read(), - "Default umi-separator was not 'rbc'" - ) def test_can_run_pipeline_with_genome_that_has_no_gzip(self):