-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.yaml
More file actions
executable file
·81 lines (75 loc) · 1.91 KB
/
example.yaml
File metadata and controls
executable file
·81 lines (75 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Grid Engine global parameters
grid_engine:
cpus: 4
memory: "16G" # Total memory for the job
runtime: "1:0:0" # HH:MM:SS
parallel_jobs: 8 # For GNU parallel processes
# Data locations
raw_data:
root_folder: "/data/IPHS-Finer-multimorbidity/CPRD-GOLD/Data/Primary Care/GOLD"
pattern: "21_000345_gold_patlist*Extract*" # Pattern to match subfolders
# Directory configurations
processed_data_folder: "/data/scratch/${USER}/ImportData/temp"
codelists_folder: "/data/IPHS-Finer-multimorbidity/CPRD-GOLD/Codelists"
lookups_folder: "/data/IPHS-Finer-multimorbidity/CPRD-GOLD/lookups"
database: "/data/scratch/${USER}/test_new.db"
codelists:
ltcs:
original: "medcodes_terms.txt"
user: "QMUL_CPRD_GOLD_codelists_grouped.txt"
# Table configurations
tables:
clinical:
file_pattern: "*Clinical*.txt"
date_columns:
- eventdate
lookup_columns:
constype: "SED.txt"
episode: "EPI.txt"
codelist_annotations:
medcode: ltcs
columns:
patid: INTEGER
eventdate: TEXT
constype: TEXT
consid: INTEGER
medcode: INTEGER
staffid: INTEGER
episode: TEXT
enttype: INTEGER
adid: INTEGER
term: TEXT
ltc: INTEGER
count: INTEGER
indexes:
- [patid]
patient:
file_pattern: "*Patient*.txt"
date_columns:
- crd
- tod
lookup_columns:
gender: "SEX.txt"
additional_files:
ethnicity: "21_000345_ethnicity_gold.txt"
columns:
patid: INTEGER
vmid: INTEGER
gender: TEXT
yob: INTEGER
mob: INTEGER
frd: TEXT
crd: TEXT
regstat: INTEGER
internal: INTEGER
tod: TEXT
toreason: INTEGER
dob: TEXT
# therapy:
# file_pattern: "*Therapy*.txt"
# date_columns:
# - eventdate
# codelist_columns:
# prodcode: "QMUL_GOLD_PRODCODES_codelists.txt"
# lookup_files:
# bnf_mapping: "GOLD_prodcodes_subparas.txt"