From aa8d52741bf4ce8393810252e14e68b878f5d025 Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Tue, 8 Sep 2020 17:42:38 +0100 Subject: [PATCH] Error in demultiplex.py with 3' barcode "Handle the case where randomer is already in the header." This did not include if the nucleotide after the rbc is an N, which it often is, meaning that you have ":rbc:" twice in the header. --- iCount/demultiplex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iCount/demultiplex.py b/iCount/demultiplex.py index 7ef08bb..897b6b7 100644 --- a/iCount/demultiplex.py +++ b/iCount/demultiplex.py @@ -175,13 +175,13 @@ def _extract(reads, barcodes, **kwargs): def add_randomer_to_header(randomer, fq_entry): """Add randomer info to FASTQ header.""" - match = re.match(r'.*(:rbc:)([ACGT]+).*', fq_entry.id) + match = re.match(r'.*(:rbc:)([ACGTN]+).*', fq_entry.id) if match: # Handle the case where randomer is already in the header. rbc = match.group(2) randomer = randomer + rbc - fq_entry.id = re.sub(r':rbc:[ACGT]+', '', fq_entry.id) + fq_entry.id = re.sub(r':rbc:[ACGTN]+', '', fq_entry.id) if fq_entry.id[-2:] in ['/1', '/2']: # For early versions of Illumina, keep mate info at the end: