From 8e74fbb7ea1a98b7d936f2d3853c4d93a61a02f8 Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:03:52 +0100 Subject: [PATCH 1/6] Added BEDPE exporting --- modules/getvisualisations.nf | 40 ++++++++++++++++++++++++++++++++++ workflows/getvisualisations.nf | 9 ++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/modules/getvisualisations.nf b/modules/getvisualisations.nf index dd61b33..5fc641f 100644 --- a/modules/getvisualisations.nf +++ b/modules/getvisualisations.nf @@ -102,4 +102,44 @@ process GET_ARCS { get_arcs.R --clusters $clusters --genes $genes --breaks $breaks --output ${sample_id} """ +} + +process EXPORT_BEDPE { + + tag "${sample_id}" + label 'process_low' + + publishDir "${params.outdir}/igv", mode: 'copy', overwrite: true + + input: + val(type) + tuple val(sample_id), path(hybrids) + + output: + tuple val(sample_id), path("${sample_id}.${type}.bedpe.gz"), emit: bedpe + + script: + """ + #!/usr/bin/env Rscript + + suppressPackageStartupMessages(library(data.table)) + suppressPackageStartupMessages(library(toscatools)) + + hybrids.dt <- fread("$hybrids") + hybrids.dt <- toscatools::reorient_hybrids(hybrids.dt) + + if($type == "hybrids") { + bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "total_count", "L_strand", "R_strand") + bedpe.dt <- hybrids.dt[, ..bedpe.colnames] + } else if($type == "clusters") { + bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "cluster_hybrid_count", "L_strand", "R_strand") + bedpe.dt <- hybrids.dt[, ..bedpe.colnames] + } + + bedpe.dt[, `:=` (L_start = L_start - 1, + R_start = R_start - 1)] + + fwrite(bedpe.dt, "${sample_id}.${type}.bedpe.gz", sep = "\t", col.names = FALSE, quote = FALSE) + """ + } \ No newline at end of file diff --git a/workflows/getvisualisations.nf b/workflows/getvisualisations.nf index 4cec2c9..4318bb3 100644 --- a/workflows/getvisualisations.nf +++ b/workflows/getvisualisations.nf @@ -3,9 +3,11 @@ // Specify DSL2 nextflow.enable.dsl=2 -include { EXPORT_GENOMIC_BED as EXPORT_HYBRID_GENOMIC_BED; +include { EXPORT_GENOMIC_BED as EXPORT_HYBRIDS_GENOMIC_BED; EXPORT_GENOMIC_BED as EXPORT_CLUSTERS_GENOMIC_BED; EXPORT_GENOMIC_BAM; + EXPORT_BEDPE as EXPORT_HYBRIDS_BEDPE; + EXPORT_BEDPE as EXPORT_CLUSTERS_BEDPE; GET_CONTACT_MAPS; GET_ARCS } from '../modules/getvisualisations.nf' @@ -20,10 +22,13 @@ workflow GET_VISUALISATIONS { goi main: - EXPORT_HYBRID_GENOMIC_BED("hybrids", hybrids) + EXPORT_HYBRIDS_GENOMIC_BED("hybrids", hybrids) EXPORT_GENOMIC_BAM(EXPORT_HYBRID_GENOMIC_BED.out.bed, genome_fai.collect()) EXPORT_CLUSTERS_GENOMIC_BED("clusters", clusters) + EXPORT_HYBRIDS_BEDPE("hybrids", hybrids) + EXPORT_CLUSTERS_BEDPE("clusters", clusters) + if(params.goi) { GET_CONTACT_MAPS(hybrids, transcript_fai.collect(), goi.collect()) From 9a5f7a8adb298b0437f75f59183073b56204f04b Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:07:01 +0100 Subject: [PATCH 2/6] Added missing S to hybrids --- workflows/getvisualisations.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/getvisualisations.nf b/workflows/getvisualisations.nf index 4318bb3..e0d8d21 100644 --- a/workflows/getvisualisations.nf +++ b/workflows/getvisualisations.nf @@ -23,7 +23,7 @@ workflow GET_VISUALISATIONS { main: EXPORT_HYBRIDS_GENOMIC_BED("hybrids", hybrids) - EXPORT_GENOMIC_BAM(EXPORT_HYBRID_GENOMIC_BED.out.bed, genome_fai.collect()) + EXPORT_GENOMIC_BAM(EXPORT_HYBRIDS_GENOMIC_BED.out.bed, genome_fai.collect()) EXPORT_CLUSTERS_GENOMIC_BED("clusters", clusters) EXPORT_HYBRIDS_BEDPE("hybrids", hybrids) From d4e40e69d400778b478521f9fc077c2f4edb6227 Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:09:13 +0100 Subject: [PATCH 3/6] Missing quote --- modules/getvisualisations.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/getvisualisations.nf b/modules/getvisualisations.nf index 5fc641f..1464617 100644 --- a/modules/getvisualisations.nf +++ b/modules/getvisualisations.nf @@ -128,7 +128,7 @@ process EXPORT_BEDPE { hybrids.dt <- fread("$hybrids") hybrids.dt <- toscatools::reorient_hybrids(hybrids.dt) - if($type == "hybrids") { + if("$type" == "hybrids") { bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "total_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] } else if($type == "clusters") { From 60331b223854eb85c4aa1da2374011a343f2676f Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:09:33 +0100 Subject: [PATCH 4/6] Missing quote --- modules/getvisualisations.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/getvisualisations.nf b/modules/getvisualisations.nf index 1464617..e3b2ec2 100644 --- a/modules/getvisualisations.nf +++ b/modules/getvisualisations.nf @@ -131,7 +131,7 @@ process EXPORT_BEDPE { if("$type" == "hybrids") { bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "total_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] - } else if($type == "clusters") { + } else if("$type" == "clusters") { bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "cluster_hybrid_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] } From 963c53a61b885f80555e53380ac696cc2f357eaa Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:11:08 +0100 Subject: [PATCH 5/6] Corrected cluster count column name --- modules/getvisualisations.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/getvisualisations.nf b/modules/getvisualisations.nf index e3b2ec2..65d3f73 100644 --- a/modules/getvisualisations.nf +++ b/modules/getvisualisations.nf @@ -132,7 +132,7 @@ process EXPORT_BEDPE { bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "total_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] } else if("$type" == "clusters") { - bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "cluster_hybrid_count", "L_strand", "R_strand") + bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "cluster_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] } From 191fe600a4a6711f0f7f7f2ce0c01082908506ac Mon Sep 17 00:00:00 2001 From: Anob Chakrabarti Date: Fri, 18 Apr 2025 12:15:47 +0100 Subject: [PATCH 6/6] corrected cluster count colname --- modules/getvisualisations.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/getvisualisations.nf b/modules/getvisualisations.nf index 65d3f73..44bc0c9 100644 --- a/modules/getvisualisations.nf +++ b/modules/getvisualisations.nf @@ -132,7 +132,7 @@ process EXPORT_BEDPE { bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "total_count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] } else if("$type" == "clusters") { - bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "cluster_count", "L_strand", "R_strand") + bedpe.colnames <- c("L_seqnames", "L_start", "L_end", "R_seqnames", "R_start", "R_end", "name", "count", "L_strand", "R_strand") bedpe.dt <- hybrids.dt[, ..bedpe.colnames] }