Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 1_TSO500.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ set -u
cp "$raw_data"/SampleSheet.csv .

# remove header from samplesheet
sed -n -e '/Sample_ID,Sample_Name/,$p' SampleSheet.csv >> SampleSheet_updated.csv
sed -n -e '/Sample_ID,Sample_Plate/,$p' SampleSheet.csv >> SampleSheet_updated.csv

# make a list of samples and get correct order of samples for each worksheet
python "$pipeline_scripts"/filter_sample_list.py
Expand Down
105 changes: 57 additions & 48 deletions scripts/filter_sample_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
include dictionary to translate referral types

"""
import csv

#Open sample sheet
samplesheet = open('SampleSheet_updated.csv','r')
Expand All @@ -28,62 +29,70 @@
dna = set()
rna = set()

#Get column by header name instead of header position.
read_samplesheet = csv.DictReader(samplesheet)

#Go through samplesheet until you hit the header lines
for line in samplesheet:
for line in read_samplesheet:

#Remove new line character
line = line.strip()

#Skip if header line
if line.startswith('Sample'):

next

else:

#Split line into list
line = line.split(",")

#Get columns we need from sample sheet
sample_id = line[0]
worksheet = line[2]
sample_type = line[7]
description = line[9]
#Get columns we need from sample sheet
sample_id = line["Sample_ID"]
worksheet = line["Sample_Plate"]
sample_type = line["Sample_Type"]
description = line["Description"]

#Append Sample ID (first element in list) to sample list
samplelist.write(sample_id+"\n")

# Split the decription column up, as now additional referrals section
desc_parts = description.split(";")
desc_dict = {}
for part in desc_parts:
if "=" in part:
key, value = part.split("=", 1)
desc_dict[key] = value

#Get referral from Description (tenth element in list), split by ; and get third element
referral = desc_dict.get("referral", "null")

#if RNA, update referral based on dictionary
if sample_type == "RNA" and (referral in referral_dict):

referral = referral_dict[referral]

#Add worksheet to set
if sample_type == "DNA":
dna.add(worksheet)

#Append Sample ID (first element in list) to sample list
samplelist.write(sample_id+"\n")
elif sample_type == "RNA":
rna.add(worksheet)

#Get referral from Description (tenth element in list), split by ; and get third element
referral = description.split(";")[2]
referral = referral.split("=")[1]
#Write to samples correct order
samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a')

#if RNA, update referral based on dictionary
if sample_type == "RNA" and (referral in referral_dict):
samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n")

referral = referral_dict[referral]

#Add worksheet to set
if sample_type == "DNA":
dna.add(worksheet)

elif sample_type == "RNA":
rna.add(worksheet)
samplescorrect.close()

#Write to samples correct order
samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a')

samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n")
#Write any aml referral samples to additional csv
if referral == "aml":
samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a')

samplescorrect.close()

#Write any aml referral samples to additional csv
if referral == "aml":
samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a')

samplesaml.write(sample_id+",myeloid\n")

samplesaml.close()

samplesaml.write(sample_id+",myeloid\n")

samplesaml.close()

# Get additional referrals to make one csv per referral
additional_referrals = desc_dict.get("additional_referrals", "")
if additional_referrals:
for add_ref in additional_referrals.split(","):
add_ref = add_ref.strip()
if add_ref:
add_ref_file = open(
"samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a'
)
add_ref_file.write(sample_id+","+add_ref+"\n")
add_ref_file.close()

#Write out worksheets to file
with open('worksheets_dna.txt','w') as f:
Expand Down