From a0ee101de7613040f5b3d72fe9e3eb899dab94b3 Mon Sep 17 00:00:00 2001
From: niamh <niamh.teague@wales.nhs.uk>
Date: Tue, 24 Feb 2026 10:33:38 +0000
Subject: [PATCH 1/5] fixing headings

---
 scripts/filter_sample_list.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py
index e4486d9..b938447 100644
--- a/scripts/filter_sample_list.py
+++ b/scripts/filter_sample_list.py
@@ -46,8 +46,8 @@
 
 		#Get columns we need from sample sheet
 		sample_id = line[0]
-		worksheet = line[2]
-		sample_type = line[7]
+		worksheet = line[1]
+		sample_type = line[8]
 		description = line[9]
 
 		#Append Sample ID (first element in list) to sample list

From d9c3169cfb7e725af3e765ead57b77680f8a78a1 Mon Sep 17 00:00:00 2001
From: niamh <niamh.teague@wales.nhs.uk>
Date: Tue, 24 Feb 2026 10:39:42 +0000
Subject: [PATCH 2/5] making additional csvs for additional referrals column

---
 scripts/filter_sample_list.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py
index b938447..b65f6d3 100644
--- a/scripts/filter_sample_list.py
+++ b/scripts/filter_sample_list.py
@@ -53,9 +53,16 @@
 		#Append Sample ID (first element in list) to sample list
 		samplelist.write(sample_id+"\n")
 
+		# Split the decription column up, as now additional referrals section
+		desc_parts = description.split(";")
+		desc_dict = {}
+		for part in desc_parts:
+			if "=" in part:
+				key, value = part.split("=", 1)
+				desc_dict[key] = value
+
 		#Get referral from Description (tenth element in list), split by ; and get third element
-		referral = description.split(";")[2]
-		referral = referral.split("=")[1]
+		referral = desc_dict.get("referral", "null")
 
 		#if RNA, update referral based on dictionary
 		if sample_type == "RNA" and (referral in referral_dict):
@@ -83,7 +90,18 @@
 			samplesaml.write(sample_id+",myeloid\n")
 
 			samplesaml.close()
-			
+
+		# Get additional referrals to make one csv per referral
+		additional_referrals = desc_dict.get("additional_referrals", "")
+		if additional_referrals:
+			for add_ref in additional_referrals.split(","):
+				add_ref = add_ref.strip()
+				if add_ref:
+					add_ref_file = open(
+						"samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a'	
+					)
+					add_ref_file.write(sample_id+","+add_ref+"\n")
+					add_ref_file.close()
 
 #Write out worksheets to file
 with open('worksheets_dna.txt','w') as f:

From ce92515d0865e95f37c18133b12a3d981ae2a93f Mon Sep 17 00:00:00 2001
From: niamh <niamh.teague@wales.nhs.uk>
Date: Mon, 2 Mar 2026 08:40:19 +0000
Subject: [PATCH 3/5] changing samplesheet headers from position to name

---
 scripts/filter_sample_list.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py
index b65f6d3..45456e4 100644
--- a/scripts/filter_sample_list.py
+++ b/scripts/filter_sample_list.py
@@ -35,7 +35,7 @@
 	line = line.strip()
 
 	#Skip if header line
-	if line.startswith('Sample'):
+	if line.startswith("Sample"):
 		
 		next
 
@@ -44,11 +44,12 @@
 		#Split line into list
 		line = line.split(",")
 
+		
 		#Get columns we need from sample sheet
-		sample_id = line[0]
-		worksheet = line[1]
-		sample_type = line[8]
-		description = line[9]
+		sample_id = line["Sample_ID"]
+		worksheet = line["Sample_Plate"]
+		sample_type = line["Sample_Type"]
+		description = line["Description"]
 
 		#Append Sample ID (first element in list) to sample list
 		samplelist.write(sample_id+"\n")

From e89c7e597c5f2fbda83ba70b6bcab0665be7fd19 Mon Sep 17 00:00:00 2001
From: niamh <niamh.teague@wales.nhs.uk>
Date: Mon, 2 Mar 2026 09:22:00 +0000
Subject: [PATCH 4/5] changing samplesheet headers from position to name

---
 scripts/filter_sample_list.py | 124 ++++++++++++++++------------------
 1 file changed, 57 insertions(+), 67 deletions(-)

diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py
index 45456e4..b3356ec 100644
--- a/scripts/filter_sample_list.py
+++ b/scripts/filter_sample_list.py
@@ -3,6 +3,7 @@
 include dictionary to translate referral types
 
 """
+import csv
 
 #Open sample sheet
 samplesheet = open('SampleSheet_updated.csv','r')
@@ -28,81 +29,70 @@
 dna = set()
 rna = set()
 
+#Get column by header name instead of header position.
+read_samplesheet = csv.DictReader(samplesheet)
+
 #Go through samplesheet until you hit the header lines
-for line in samplesheet:
+for line in read_samplesheet:
+	
+	#Get columns we need from sample sheet
+	sample_id = line["Sample_ID"]
+	worksheet = line["Sample_Plate"]
+	sample_type = line["Sample_Type"]
+	description = line["Description"]
+
+	#Append Sample ID (first element in list) to sample list
+	samplelist.write(sample_id+"\n")
+
+	# Split the decription column up, as now additional referrals section
+	desc_parts = description.split(";")
+	desc_dict = {}
+	for part in desc_parts:
+		if "=" in part:
+			key, value = part.split("=", 1)
+			desc_dict[key] = value
+
+	#Get referral from Description (tenth element in list), split by ; and get third element
+	referral = desc_dict.get("referral", "null")
+
+	#if RNA, update referral based on dictionary
+	if sample_type == "RNA" and (referral in referral_dict):
+
+		referral = referral_dict[referral]
 	
-	#Remove new line character
-	line = line.strip()
+	#Add worksheet to set
+	if sample_type == "DNA":
+		dna.add(worksheet)
 
-	#Skip if header line
-	if line.startswith("Sample"):
-		
-		next
+	elif sample_type == "RNA":
+		rna.add(worksheet)
 
-	else:
-		
-		#Split line into list
-		line = line.split(",")
+	#Write to samples correct order
+	samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') 
 
-		
-		#Get columns we need from sample sheet
-		sample_id = line["Sample_ID"]
-		worksheet = line["Sample_Plate"]
-		sample_type = line["Sample_Type"]
-		description = line["Description"]
-
-		#Append Sample ID (first element in list) to sample list
-		samplelist.write(sample_id+"\n")
-
-		# Split the decription column up, as now additional referrals section
-		desc_parts = description.split(";")
-		desc_dict = {}
-		for part in desc_parts:
-			if "=" in part:
-				key, value = part.split("=", 1)
-				desc_dict[key] = value
-
-		#Get referral from Description (tenth element in list), split by ; and get third element
-		referral = desc_dict.get("referral", "null")
-
-		#if RNA, update referral based on dictionary
-		if sample_type == "RNA" and (referral in referral_dict):
+	samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n")
 	
-			referral = referral_dict[referral]
-		
-		#Add worksheet to set
-		if sample_type == "DNA":
-			dna.add(worksheet)
-
-		elif sample_type == "RNA":
-			rna.add(worksheet)
-
-		#Write to samples correct order
-		samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') 
+	samplescorrect.close()
 
-		samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n")
+	#Write any aml referral samples to additional csv
+	if referral == "aml":
+		samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a')
 		
-		samplescorrect.close()
-
-		#Write any aml referral samples to additional csv
-		if referral == "aml":
-			samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a')
-			
-			samplesaml.write(sample_id+",myeloid\n")
-
-			samplesaml.close()
-
-		# Get additional referrals to make one csv per referral
-		additional_referrals = desc_dict.get("additional_referrals", "")
-		if additional_referrals:
-			for add_ref in additional_referrals.split(","):
-				add_ref = add_ref.strip()
-				if add_ref:
-					add_ref_file = open(
-						"samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a'	
-					)
-					add_ref_file.write(sample_id+","+add_ref+"\n")
-					add_ref_file.close()
+		samplesaml.write(sample_id+",myeloid\n")
+
+		samplesaml.close()
+
+	# Get additional referrals to make one csv per referral
+	additional_referrals = desc_dict.get("additional_referrals", "")
+	if additional_referrals:
+		for add_ref in additional_referrals.split(","):
+			add_ref = add_ref.strip()
+			if add_ref:
+				add_ref_file = open(
+					"samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a'	
+				)
+				add_ref_file.write(sample_id+","+add_ref+"\n")
+				add_ref_file.close()
 
 #Write out worksheets to file
 with open('worksheets_dna.txt','w') as f:

From 56c0507f991851fad213e1dd1dd246310238570a Mon Sep 17 00:00:00 2001
From: niamh <niamh.teague@wales.nhs.uk>
Date: Mon, 2 Mar 2026 09:22:45 +0000
Subject: [PATCH 5/5] working with new column names

---
 1_TSO500.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/1_TSO500.sh b/1_TSO500.sh
index b18cff8..4d1d9bd 100755
--- a/1_TSO500.sh
+++ b/1_TSO500.sh
@@ -73,7 +73,7 @@ set -u
 cp "$raw_data"/SampleSheet.csv .
 
 # remove header from samplesheet
-sed -n -e '/Sample_ID,Sample_Name/,$p' SampleSheet.csv >> SampleSheet_updated.csv
+sed -n -e '/Sample_ID,Sample_Plate/,$p' SampleSheet.csv >> SampleSheet_updated.csv
 
 # make a list of samples and get correct order of samples for each worksheet
 python "$pipeline_scripts"/filter_sample_list.py