-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataProcessing.py
More file actions
77 lines (61 loc) · 2.2 KB
/
DataProcessing.py
File metadata and controls
77 lines (61 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import csv
rnaseq = 'HiSeqV2.txt'
microarray = 'HT_HG-U133A.txt'
clinical = 'ov_tcga_clinical_data.tsv'
def make_table(file):
f = open(file, 'r')
table = []
for line in f:
table.append(line.split())
f.close()
return table
def filter(data, func):
filtered = []#sample numbers only
for i in range(1, len(data)):
if func(i) == True:
filtered.append(data[i][2])
return filtered
def make_line(data, i):
string = ''
for j in range(1, len(data)):
string += (data[j][i] + ' ')
return string[:-1] + '\n'
def data_processing(rnaseq, microarray, clinical):
tsv_file = open(clinical)
read_tsv = csv.reader(tsv_file, delimiter='\t')
data = []
for row in read_tsv:
data.append(row)
tsv_file.close()
survival_time = data[0].index('Overall Survival (Months)')
survival_status = data[0].index('Overall Survival Status')
#Deceased && survival < 36 mo
group1 = filter(data, lambda x: data[x][survival_status] == '1:DECEASED' and (float(data[x][survival_time]) < 36 if data[x][survival_time] != 'NA' else False))
#survival > 36 mo
group2 = filter(data, lambda x: float(data[x][survival_time]) > 36 if data[x][survival_time] != 'NA' else False)
seq = make_table(rnaseq)
f = open('SeqData.txt', 'w')
f.write('sampleID' + ' groupID ')
f.write(make_line(seq, 0))
for i in range(1, len(seq[0])):
if seq[0][i] in group1:
f.write(seq[0][i] + ' 1 ')
f.write(make_line(seq, i))
if seq[0][i] in group2:
f.write(seq[0][i] + ' 2 ')
f.write(make_line(seq, i))
f.close()
array = make_table(microarray)
f = open('ArrayData.txt', 'w')
f.write('sampleID' + ' groupID ')
f.write(make_line(array, 0))
for i in range(1, len(array[0])):
if array[0][i] in group1:
f.write(array[0][i] + ' 1 ')
f.write(make_line(array, i))
if array[0][i] in group2:
f.write(array[0][i] + ' 2 ')
f.write(make_line(array, i))
return
if __name__ == '__main__':
data_processing(rnaseq, microarray, clinical)