diff --git a/1_TSO500.sh b/1_TSO500.sh index b18cff8..4d1d9bd 100755 --- a/1_TSO500.sh +++ b/1_TSO500.sh @@ -73,7 +73,7 @@ set -u cp "$raw_data"/SampleSheet.csv . # remove header from samplesheet -sed -n -e '/Sample_ID,Sample_Name/,$p' SampleSheet.csv >> SampleSheet_updated.csv +sed -n -e '/Sample_ID,Sample_Plate/,$p' SampleSheet.csv >> SampleSheet_updated.csv # make a list of samples and get correct order of samples for each worksheet python "$pipeline_scripts"/filter_sample_list.py diff --git a/scripts/filter_sample_list.py b/scripts/filter_sample_list.py index e4486d9..b3356ec 100644 --- a/scripts/filter_sample_list.py +++ b/scripts/filter_sample_list.py @@ -3,6 +3,7 @@ include dictionary to translate referral types """ +import csv #Open sample sheet samplesheet = open('SampleSheet_updated.csv','r') @@ -28,62 +29,70 @@ dna = set() rna = set() +#Get column by header name instead of header position. +read_samplesheet = csv.DictReader(samplesheet) + #Go through samplesheet until you hit the header lines -for line in samplesheet: +for line in read_samplesheet: - #Remove new line character - line = line.strip() - - #Skip if header line - if line.startswith('Sample'): - - next - - else: - - #Split line into list - line = line.split(",") - - #Get columns we need from sample sheet - sample_id = line[0] - worksheet = line[2] - sample_type = line[7] - description = line[9] + #Get columns we need from sample sheet + sample_id = line["Sample_ID"] + worksheet = line["Sample_Plate"] + sample_type = line["Sample_Type"] + description = line["Description"] + + #Append Sample ID (first element in list) to sample list + samplelist.write(sample_id+"\n") + + # Split the decription column up, as now additional referrals section + desc_parts = description.split(";") + desc_dict = {} + for part in desc_parts: + if "=" in part: + key, value = part.split("=", 1) + desc_dict[key] = value + + #Get referral from Description (tenth element in list), split by ; and get third element + referral = desc_dict.get("referral", "null") + + #if RNA, update referral based on dictionary + if sample_type == "RNA" and (referral in referral_dict): + + referral = referral_dict[referral] + + #Add worksheet to set + if sample_type == "DNA": + dna.add(worksheet) - #Append Sample ID (first element in list) to sample list - samplelist.write(sample_id+"\n") + elif sample_type == "RNA": + rna.add(worksheet) - #Get referral from Description (tenth element in list), split by ; and get third element - referral = description.split(";")[2] - referral = referral.split("=")[1] + #Write to samples correct order + samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') - #if RNA, update referral based on dictionary - if sample_type == "RNA" and (referral in referral_dict): + samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n") - referral = referral_dict[referral] - - #Add worksheet to set - if sample_type == "DNA": - dna.add(worksheet) - - elif sample_type == "RNA": - rna.add(worksheet) + samplescorrect.close() - #Write to samples correct order - samplescorrect = open('samples_correct_order_'+worksheet+"_"+sample_type+".csv",'a') - - samplescorrect.write(sample_id+","+worksheet+","+sample_type+","+referral+"\n") + #Write any aml referral samples to additional csv + if referral == "aml": + samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a') - samplescorrect.close() - - #Write any aml referral samples to additional csv - if referral == "aml": - samplesaml = open("samples_aml_to_myeloid_"+worksheet+"_"+sample_type+".csv",'a') - - samplesaml.write(sample_id+",myeloid\n") - - samplesaml.close() - + samplesaml.write(sample_id+",myeloid\n") + + samplesaml.close() + + # Get additional referrals to make one csv per referral + additional_referrals = desc_dict.get("additional_referrals", "") + if additional_referrals: + for add_ref in additional_referrals.split(","): + add_ref = add_ref.strip() + if add_ref: + add_ref_file = open( + "samples_additional_referral_"+add_ref+"_"+worksheet+"_"+sample_type+".csv", 'a' + ) + add_ref_file.write(sample_id+","+add_ref+"\n") + add_ref_file.close() #Write out worksheets to file with open('worksheets_dna.txt','w') as f: