-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathharmonized_script.py
More file actions
114 lines (97 loc) · 3.63 KB
/
harmonized_script.py
File metadata and controls
114 lines (97 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import csv
import logging
import random
import os
CURATION_STATUS = ['new', 'curated', ]
current_dir = os.getcwd()
# Change paths as necessary
hopitals_file = os.path.join(current_dir, 'hospitals.csv')
cities_file = os.path.join(current_dir, 'cities.csv')
names_file = os.path.join(current_dir, 'names.csv')
cities_list = []
first_names_list = []
last_names_list = []
institutions = []
institution_ids = []
physician_ids = []
def create_harmonized_institution():
raise NotImplementedError("Implement")
def create_harmonized_physician():
raise NotImplementedError("Implement")
def create_physician_x_institution():
raise NotImplementedError("Implement")
with open(cities_file, newline='') as cities_csv:
cities_reader = csv.reader(
cities_csv, delimiter=',',)
for idx, row in enumerate(cities_reader):
if idx == 0:
continue
city, state = row
cities_list.append(
{'city': city, 'state': state})
with open(names_file) as names_csv:
names_reader = csv.reader(
names_csv)
for idx, row in enumerate(names_reader):
if idx == 0:
continue
first_name, last_name = row
first_names_list.append(first_name)
last_names_list.append(last_name)
def populate_institutions():
logging.info(f"Creating institutions.....")
with open(hopitals_file) as institutions_csv:
hospitals_reader = csv.reader(
institutions_csv)
for row in hospitals_reader:
institution = row[0]
zip_code = f"{random.randrange(10000, 99999)}-{random.randrange(1000, 9999)}"
city, state = random.choice(cities_list).values()
curation = random.choice(CURATION_STATUS)
institution_dict = {
'institution_name': institution,
'city': city,
'state': state,
'zip_code': zip_code,
'curated': curation
}
ids = create_harmonized_institution(**institution_dict)
print(f"Institution id # {ids}")
institution_ids.append(str(ids))
def populate_physicians():
for _ in range(0, 10000):
first_name = random.choice(first_names_list)
last_name = random.choice(last_names_list)
physician_dict = {
'md_firstname_har': first_name,
'md_lastname_har': last_name,
'curated': 'curated',
}
md_phone = f"{random.randrange(100, 999)}-{random.randrange(100, 999)}-{random.randrange(1000, 9999)}"
md_fax = f"{random.randrange(100, 999)}-{random.randrange(100, 999)}-{random.randrange(1000, 9999)}"
md_email = f"{first_name.lower()}.{last_name.lower()}@gmail.com"
ids = create_harmonized_physician(**physician_dict)
print(f"Physician id # {ids}")
physician_ids.append(
{
'harmonized_physician': str(ids),
'md_phone': md_phone,
'md_email': md_email,
'md_fax': md_fax
}
)
def populate_physician_x_institution():
for _ in range(0, 20000):
harmonized_physician, md_phone, md_email, md_fax = random.choice(
physician_ids).values()
harmonized_institution = random.choice(institution_ids)
pxi_dict = {
'harmonized_physician': harmonized_physician,
'harmonized_institution': harmonized_institution,
'md_phone': md_phone,
'md_email': md_email,
'md_fax': md_fax,
'curated': 'curated',
}
ids = create_physician_x_institution(**pxi_dict)
print(f"Physician/Institution id # {ids}")