-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
149 lines (117 loc) · 6.13 KB
/
main.py
File metadata and controls
149 lines (117 loc) · 6.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
----------------------------------------------------------------------------------------------------
main.py
-- parse data from runInfo.xml and add to runinfo dictionary
-- parse data from samplesheet.csv and add to samplesheet dictionary
-- parse data from interops files and add to interop dictionary
-- add data from dictionaries above to runlog table in RunlogDB
-- determine if run was from hiseq or miseq
-- add data from RunParameters.xml file to either hiseq or miseq table in RunlogDB
----------------------------------------------------------------------------------------------------
"""
# Import scripts
from scripts import add_to_db, parse_interop, parse_runinfo, parse_runparameters, parse_samplesheet
import sys
# Load run folder
run_folder = sys.argv[1]
#run_folder = r"/Users/erik/Documents/RunLog"
# ----------------------------------------------------------------------------------------------------
# PARSE RUNINFO
# Create empty runinfo dictionary and define parameters to extract from RunInfo
runinfo_dict = {}
runinfo_values = [["Id", "get"],
["Instrument", "find"],
["Date", "find"]]
# data from runinfo_values array
for item in runinfo_values:
parse_runinfo.parse1(run_folder, runinfo_dict, item[0], item[1])
# number of cycles
parse_runinfo.parse2(run_folder, runinfo_dict)
# ----------------------------------------------------------------------------------------------------
# PARSE SAMPLESHEET
# create empty dictionary and define parameters to extract
samplesheet_dict = {}
samplesheet_values1 = ["Investigator Name", "Experiment Name", "Date", "Workflow", "Application",
"Assay", "Description", "Chemistry"]
samplesheet_values2 = [["Plates", "Sample_Plate"],
["Description2", "Description"],
["Samples", "Sample_ID"],
["I7", "I7_Index_ID"],
["I5", "I5_Index_ID"]]
# data from samplesheet values array
for item in samplesheet_values1:
parse_samplesheet.parse1(run_folder, samplesheet_dict, item)
# plates, description, samples and indexes
for item in samplesheet_values2:
parse_samplesheet.parse2(run_folder, samplesheet_dict, item[0], item[1])
# pipeline
parse_samplesheet.parse3(samplesheet_dict)
# ----------------------------------------------------------------------------------------------------
# PARSE INTEROPS
# create empty dictionary
interop_dict = {}
# parse (parameters set in function)
parse_interop.parse(run_folder, interop_dict)
# ----------------------------------------------------------------------------------------------------
# ADD TO DATABASE
add_to_db.runinfo_add(runinfo_dict, samplesheet_dict, interop_dict)
# ----------------------------------------------------------------------------------------------------
# PARSE RUNPARAMETERS
# hiseq
if parse_runparameters.instrument_type(run_folder) == "HiSeq":
# create empty dictionary and define variables to be extracted
hiseq_dict = {}
runparameter_values1 = ["RunID", "WorkFlowType", "PairEndFC", "Flowcell", "Sbs", "Pe", "Index",
"ClusteringChoice", "RapidRunChemistry", "RunMode", "ApplicationName", "ApplicationVersion",
"FPGAVersion", "CPLDVersion", "RTAVersion", "ChemistryVersion", "CameraFirmware", "CameraDriver"]
runparameter_values2 = [["Sbs", "SbsReagentKit"],
["Index", "ReagentKit"]]
# extract variables
for item in runparameter_values1:
parse_runparameters.hiseq1(run_folder, hiseq_dict, item)
for item in runparameter_values2:
parse_runparameters.hiseq2(run_folder, hiseq_dict, item[0], item[1])
# upload data to database
add_to_db.hiseq_add(hiseq_dict)
# ----------------------------------------------------------------------------------------------------
# miseq
if parse_runparameters.instrument_type(run_folder) == "MiSeq":
# create empty dictionary and define variables to be extracted
miseq_dict = {}
runparameter_values1 = ["RunID", "MCSVersion", "RTAVersion"]
runparameter_values2 = [["FlowcellRFIDTag", "SerialNumber"],
["FlowcellRFIDTag", "PartNumber"],
["FlowcellRFIDTag", "ExpirationDate"],
["PR2BottleRFIDTag", "SerialNumber"],
["PR2BottleRFIDTag", "PartNumber"],
["PR2BottleRFIDTag", "ExpirationDate"],
["ReagentKitRFIDTag", "SerialNumber"],
["ReagentKitRFIDTag", "PartNumber"],
["ReagentKitRFIDTag", "ExpirationDate"]]
# extract variables
for item in runparameter_values1:
parse_runparameters.miseq1(run_folder, miseq_dict, item)
for item in runparameter_values2:
parse_runparameters.miseq2(run_folder, miseq_dict, item[0], item[1])
# upload data to database
add_to_db.miseq_add(miseq_dict)
# ----------------------------------------------------------------------------------------------------
# nextseq
if parse_runparameters.instrument_type(run_folder) == "NextSeq":
# create empty dictionary and define variables to be extracted
nextseq_dict = {}
runparameter_values1 = ["RunID", "InstrumentID", "RTAVersion", "SystemSuiteVersion", "FlowCellSerial",
"PR2BottleSerial", "ReagentKitSerial", "ExperimentName", "LibraryID", "Chemistry", "FocusMethod",
"SurfaceToScan", "IsPairedEnd", "CustomReadOnePrimer", "CustomReadTwoPrimer", "CustomIndexPrimer",
"CustomIndexTwoPrimer", "UsesCustomReadOnePrimer", "UsesCustomReadTwoPrimer",
"UsesCustomIndexPrimer", "UsesCustomIndexTwoPrimer", "RunManagementType", "BaseSpaceRunId",
"BaseSpaceRunMode", "ComputerName", "MaxCyclesSupportedByReagentKit"]
runparameter_values2 = [["Setup", "ApplicationVersion"],
["Setup", "ApplicationName"]]
# extract variables
for item in runparameter_values1:
parse_runparameters.nextseq1(run_folder, nextseq_dict, item)
for item in runparameter_values2:
parse_runparameters.nextseq2(run_folder, nextseq_dict, item[0], item[1])
# upload data to database
add_to_db.nextseq_add(nextseq_dict)