-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_tools.py
More file actions
172 lines (123 loc) · 5.14 KB
/
data_tools.py
File metadata and controls
172 lines (123 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from __init__ import *
dir_path = "../data/" # Path to save data
frame_strings = [ 'File_name',
'signal',
'trap',
'B_bkg [Tesla]',
'Cyclotron frequency [Hertz]',
'Downmixed cyclotron frequency [Hertz]',
'Energy [eV]',
'LO frequency [Hertz]',
'Pitch angle [degrees]',
'Starting x position [metres]',
'Starting y position [metres]',
'Starting z position [metres]',
'Starting velocity [metres/second]',
'Time step [seconds]',
'Waveguide impedance [Ohms]',
'i_coil [Amps]',
'r_coil [metres]',
'r_wg [metres]'
]
def set_data_path(file):
return dir_path + file
def get_signal(file, signal_string, full_path=False, override_path=False):
if full_path:
data_path = file
else:
data_path = set_data_path(file)
if override_path:
data_path = override_path + file
if not data_path.endswith('.h5'):
raise ValueError("File must be an h5 file")
if not os.path.exists(data_path):
raise FileNotFoundError("File does not exist")
with h5py.File(data_path, 'r') as f:
signal = f['Data'][signal_string][:]
return signal
def get_attributes(file, verbose=False, full_path=False):
""" Print the attributes of the signal in the h5 file
Args:
file (str): Path to the h5 file
Returns:
list: List of signal strings
list: List of attributes
list: List of attribute values
"""
data_path = set_data_path(file)
# Error handling
if not data_path.endswith('.h5'):
raise ValueError("File must be an h5 file")
if not os.path.exists(data_path):
raise FileNotFoundError("File does not exist")
##################################################
with h5py.File(data_path, 'r') as f:
signal_strings = list(f['Data'].keys()) # ['signal1', 'signal2']
attrs_container = []
for signal_string in signal_strings:
attributes = list((f['Data'][signal_string].attrs.keys()))
attribute_vals = list((f['Data'][signal_string].attrs.values()))
attrs_container.append(attribute_vals)
if verbose:
print('Signal String: ', signal_string)
for attribute in attributes:
print(attribute, ': ', f['Data'][signal_string].attrs[attribute])
return signal_strings, attributes, attrs_container
def get_truth_data(f_list, verbose=False, full_path=True, savefile=False):
""" Get the truth data from the h5 file and save it to a csv file
This is only applicable to some .h5 files as they don't always have the same file structure.
Args:
f_list (.h5): Paths to h5 files
verbose (bool, optional): Defaults to False.
full_path (bool, optional): Path to file. Defaults to True.
savefile (bool, optional): Option to Save .csv . Defaults to False.
"""
#! TODO Improve this so that it isn't rigid, have it read out the Keys for the truth info and construct dataframe/csv from those keys.
# Error handling
if not isinstance(f_list, list):
raise ValueError("f_list must be a list of file paths")
if not all([f.endswith('.h5') for f in f_list]):
raise ValueError("All files must be h5 files")
if not all([os.path.exists(f) for f in f_list]):
raise FileNotFoundError("All files must exist")
if not isinstance(verbose, bool):
raise ValueError("Verbose must be a boolean")
if not isinstance(full_path, bool):
raise ValueError("full_path must be a boolean")
##################################################
df = pd.DataFrame()
for i, f in enumerate(f_list):
signal_strings, b, data = get_attributes(f, full_path=True)
filename = f.split('/')[-1]
trap_type = f.split('/')[-3]
vars = {
'File_name': filename,
'signal': signal_strings[0],
'trap': trap_type,
'B_bkg': data[0][0],
'f_cyc': data[0][1],
'f_cyc_d': data[0][2],
'energy': data[0][3],
'f_lo': data[0][4],
'pitch_angle': data[0][5],
'r_x': float(data[0][6][0]),
'r_y': float(data[0][6][1]),
'r_z': float(data[0][6][2]),
'v_0': [data[0][7]],
'dt': data[0][8],
'Z_wg': data[0][9],
'i_coil': data[0][10],
'r_coil': data[0][11],
'r_wg': data[0][12]
}
new_frame = pd.DataFrame(vars)
if i == 0:
df = pd.DataFrame(vars)
else:
df = pd.concat([df, new_frame], axis=0)
if verbose and i % 100 == 0:
print(str(i) + ' files processed')
df.columns = frame_strings
if savefile!=False:
df.to_csv(savefile, index=False)
return df