From a0ee101de7613040f5b3d72fe9e3eb899dab94b3 Mon Sep 17 00:00:00 2001 From: niamh Date: Tue, 24 Feb 2026 10:33:38 +0000 Subject: [PATCH 1/5] fixing headings --- scripts/filter_sample_list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py index e4486d9..b938447 100644 --- a/scripts/filter_sample_list.py +++ b/scripts/filter_sample_list.py @@ -46,8 +46,8 @@ #Get columns we need from sample sheet sample_id = line[0] - worksheet = line[2] - sample_type = line[7] + worksheet = line[1] + sample_type = line[8] description = line[9] #Append Sample ID (first element in list) to sample list From d9c3169cfb7e725af3e765ead57b77680f8a78a1 Mon Sep 17 00:00:00 2001 From: niamh Date: Tue, 24 Feb 2026 10:39:42 +0000 Subject: [PATCH 2/5] making additional csvs for additional referrals column --- scripts/filter_sample_list.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py index b938447..b65f6d3 100644 --- a/scripts/filter_sample_list.py +++ b/scripts/filter_sample_list.py @@ -53,9 +53,16 @@ #Append Sample ID (first element in list) to sample list samplelist.write(sample_id+"\n") + # Split the decription column up, as now additional referrals section + desc_parts = description.split(";") + desc_dict = {} + for part in desc_parts: + if "=" in part: + key, value = part.split("=", 1) + desc_dict[key] = value + #Get referral from Description (tenth element in list), split by ; and get third element - referral = description.split(";")[2] - referral = referral.split("=")[1] + referral = desc_dict.get("referral", "null") #if RNA, update referral based on dictionary if sample_type == "RNA" and (referral in referral_dict): @@ -83,7 +90,18 @@ samplesaml.write(sample_id+",myeloid\n") samplesaml.close() - + + # Get additional referrals to make one csv per referral + additional_referrals = desc_dict.get("additional_referrals", "") + if additional_referrals: + for add_ref in additional_referrals.split(","): + add_ref = add_ref.strip() + if add_ref: + add_ref_file = open( + "samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a' + ) + add_ref_file.write(sample_id+","+add_ref+"\n") + add_ref_file.close() #Write out worksheets to file with open('worksheets_dna.txt','w') as f: From ce92515d0865e95f37c18133b12a3d981ae2a93f Mon Sep 17 00:00:00 2001 From: niamh Date: Mon, 2 Mar 2026 08:40:19 +0000 Subject: [PATCH 3/5] changing samplesheet headers from position to name --- scripts/filter_sample_list.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py index b65f6d3..45456e4 100644 --- a/scripts/filter_sample_list.py +++ b/scripts/filter_sample_list.py @@ -35,7 +35,7 @@ line = line.strip() #Skip if header line - if line.startswith('Sample'): + if line.startswith("Sample"): next @@ -44,11 +44,12 @@ #Split line into list line = line.split(",") + #Get columns we need from sample sheet - sample_id = line[0] - worksheet = line[1] - sample_type = line[8] - description = line[9] + sample_id = line["Sample_ID"] + worksheet = line["Sample_Plate"] + sample_type = line["Sample_Type"] + description = line["Description"] #Append Sample ID (first element in list) to sample list samplelist.write(sample_id+"\n") From e89c7e597c5f2fbda83ba70b6bcab0665be7fd19 Mon Sep 17 00:00:00 2001 From: niamh Date: Mon, 2 Mar 2026 09:22:00 +0000 Subject: [PATCH 4/5] changing samplesheet headers from position to name --- scripts/filter_sample_list.py | 124 ++++++++++++++++------------------ 1 file changed, 57 insertions(+), 67 deletions(-) diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py index 45456e4..b3356ec 100644 --- a/scripts/filter_sample_list.py +++ b/scripts/filter_sample_list.py @@ -3,6 +3,7 @@ include dictionary to translate referral types """ +import csv #Open sample sheet samplesheet = open('SampleSheet_updated.csv','r') @@ -28,81 +29,70 @@ dna = set() rna = set() +#Get column by header name instead of header position. +read_samplesheet = csv.DictReader(samplesheet) + #Go through samplesheet until you hit the header lines -for line in samplesheet: +for line in read_samplesheet: + + #Get columns we need from sample sheet + sample_id = line["Sample_ID"] + worksheet = line["Sample_Plate"] + sample_type = line["Sample_Type"] + description = line["Description"] + + #Append Sample ID (first element in list) to sample list + samplelist.write(sample_id+"\n") + + # Split the decription column up, as now additional referrals section + desc_parts = description.split(";") + desc_dict = {} + for part in desc_parts: + if "=" in part: + key, value = part.split("=", 1) + desc_dict[key] = value + + #Get referral from Description (tenth element in list), split by ; and get third element + referral = desc_dict.get("referral", "null") + + #if RNA, update referral based on dictionary + if sample_type == "RNA" and (referral in referral_dict): + + referral = referral_dict[referral] - #Remove new line character - line = line.strip() + #Add worksheet to set + if sample_type == "DNA": + dna.add(worksheet) - #Skip if header line - if line.startswith("Sample"): - - next + elif sample_type == "RNA": + rna.add(worksheet) - else: - - #Split line into list - line = line.split(",") + #Write to samples correct order + samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') - - #Get columns we need from sample sheet - sample_id = line["Sample_ID"] - worksheet = line["Sample_Plate"] - sample_type = line["Sample_Type"] - description = line["Description"] - - #Append Sample ID (first element in list) to sample list - samplelist.write(sample_id+"\n") - - # Split the decription column up, as now additional referrals section - desc_parts = description.split(";") - desc_dict = {} - for part in desc_parts: - if "=" in part: - key, value = part.split("=", 1) - desc_dict[key] = value - - #Get referral from Description (tenth element in list), split by ; and get third element - referral = desc_dict.get("referral", "null") - - #if RNA, update referral based on dictionary - if sample_type == "RNA" and (referral in referral_dict): + samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n") - referral = referral_dict[referral] - - #Add worksheet to set - if sample_type == "DNA": - dna.add(worksheet) - - elif sample_type == "RNA": - rna.add(worksheet) - - #Write to samples correct order - samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') + samplescorrect.close() - samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n") + #Write any aml referral samples to additional csv + if referral == "aml": + samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a') - samplescorrect.close() - - #Write any aml referral samples to additional csv - if referral == "aml": - samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a') - - samplesaml.write(sample_id+",myeloid\n") - - samplesaml.close() - - # Get additional referrals to make one csv per referral - additional_referrals = desc_dict.get("additional_referrals", "") - if additional_referrals: - for add_ref in additional_referrals.split(","): - add_ref = add_ref.strip() - if add_ref: - add_ref_file = open( - "samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a' - ) - add_ref_file.write(sample_id+","+add_ref+"\n") - add_ref_file.close() + samplesaml.write(sample_id+",myeloid\n") + + samplesaml.close() + + # Get additional referrals to make one csv per referral + additional_referrals = desc_dict.get("additional_referrals", "") + if additional_referrals: + for add_ref in additional_referrals.split(","): + add_ref = add_ref.strip() + if add_ref: + add_ref_file = open( + "samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a' + ) + add_ref_file.write(sample_id+","+add_ref+"\n") + add_ref_file.close() #Write out worksheets to file with open('worksheets_dna.txt','w') as f: From 56c0507f991851fad213e1dd1dd246310238570a Mon Sep 17 00:00:00 2001 From: niamh Date: Mon, 2 Mar 2026 09:22:45 +0000 Subject: [PATCH 5/5] working with new column names --- 1_TSO500.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/1_TSO500.sh b/1_TSO500.sh index b18cff8..4d1d9bd 100755 --- a/1_TSO500.sh +++ b/1_TSO500.sh @@ -73,7 +73,7 @@ set -u cp "$raw_data"/SampleSheet.csv . # remove header from samplesheet -sed -n -e '/Sample_ID,Sample_Name/,$p' SampleSheet.csv >> SampleSheet_updated.csv +sed -n -e '/Sample_ID,Sample_Plate/,$p' SampleSheet.csv >> SampleSheet_updated.csv # make a list of samples and get correct order of samples for each worksheet python "$pipeline_scripts"/filter_sample_list.py