-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
81 lines (69 loc) · 2.69 KB
/
parser.py
File metadata and controls
81 lines (69 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import csv
import json
import numpy as np
import pandas as pd
typeDictionary = { #maps data types in file to python data types
'BIT':'bool',
'REAL32':'float32',
'REAL64':'float64',
'INT16':'int16',
}
"""
Function to read all information from the text file
Parameters:
filename (str): name of text file
Returns:
data : dataframe with required data
properties : file properties for JSON
"""
def read_file():
filepath = input("Enter file path: ")
data = read_data(filepath)
properties = read_file_properties(filepath)
return data, properties
"""Reads the file properties for JSON """
def read_file_properties(filename):
properties = {}#'file name':'','start date':'','start time':'','end date':'','end time':'' }
with open(filename) as file:
reader = csv.reader(file,delimiter='\t')
line_count = 0
for row in reader:
if line_count==0:
properties['file name'] = row[1]
if line_count == 2:
properties['start date'] = row[2]
properties['start time'] = row[3]
if line_count == 3:
properties['end date'] = row[2]
properties['end time'] = row[3]
break
line_count += 1
return properties
"""Reads data from text file with correct datatype"""
def read_data(filename):
skiprows = list(range(0,6))+list(range(7,21))
datatypes = pd.read_csv(filename, delim_whitespace=True, skiprows=skiprows , nrows=1, dtype='string') #read in data types of each column
datatypes = datatypes.iloc[:,1::2] #delete extra Name columns
datatypes.iloc[0,:] = [typeDictionary[type] for type in datatypes.iloc[0,:]] #convert data types to proper names using typeDictionary
datatypes = datatypes.iloc[0,:].to_dict() #converts to series then to dictionary
skiprows = list(range(0,6))+list(range(7,28))
data = pd.read_csv('test_dataset_1.txt' , delim_whitespace=True , skiprows=skiprows, skipfooter=1, dtype=datatypes)
data = pd.merge(data.iloc[:,0],data.iloc[:,1::2],left_index=True, right_index=True) #delete all extra time columns
data = data.rename(columns={'Name':'Time'}) #rename time column to have proper header
return data
"""Creates JSON"""
def create_JSON(data, properties):
properties['mean'] = data.iloc[:,1:-1].mean(axis=0).to_dict() #calculates mean, not including the time column
json_data = json.dumps(properties, indent=4)
return json_data
"""
returns:
data : pandas dataframe
json_data : json with file and mean information
"""
def main():
(data,properties) = read_file()
json_data = create_JSON(data,properties)
return data, json_data
if __name__ == "__main__":
main()