-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsplit_and_cut.py
More file actions
62 lines (53 loc) · 2.04 KB
/
split_and_cut.py
File metadata and controls
62 lines (53 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import csv
import math
import pandas as pd
import argparse
if __name__ == '__main__':
key_index = "sender"
order_index = "sequence"
max_cut = 66000 # math.inf
min_cut = 47008 # 0
parser = argparse.ArgumentParser(
description='Split and cut csv capture file ')
parser.add_argument('--file', nargs='?', help='data filename', default='out.csv')
parser.add_argument('--split', nargs='?', help='split into multiple files', default='True')
args = parser.parse_args()
split = args.split == "True"
_filename = args.file
_file = pd.read_csv(_filename)
files = dict()
max_row_global = math.inf
files_matrix = {}
if not split:
combined = open("out_filtered" + ".csv", 'w', newline='')
for base_row in _file.iterrows():
row = base_row[1]
id_row = row[key_index]
if id_row not in files:
if split:
new_file = open(id_row + ".csv", 'w', newline='')
else:
new_file = combined
files[id_row] = {'file': csv.writer(new_file),
'rows': [],
'max_row': 0}
max_row_for_file = files[id_row]["max_row"]
order_number = row[order_index]
# stop when seq is smaller than previous
if max_row_for_file < order_number:
# stop when diff of max seq is too big a leap
if abs(order_number - max_row_for_file) < 100000:
if max_cut >= order_number >= min_cut:
files[id_row]["rows"].append(row)
files[id_row]["max_row"] = row[order_index]
max_ids = set()
for new_file_data in files.values():
max_ids.add(new_file_data["max_row"])
first = True
for new_file_data in files.values():
if first or split:
new_file_data["file"].writerow(["node", "sequence", "rssi"])
first = False
for row in new_file_data["rows"]:
if row["sequence"] <= min(max_ids):
new_file_data["file"].writerow(row)