-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcluster_log.py
More file actions
135 lines (94 loc) · 3.93 KB
/
cluster_log.py
File metadata and controls
135 lines (94 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
'''Visualizing-Business-Process-Evolution
author Anton Yeshchenko
'''
import os
from pm4py.objects.log.importer.xes import factory as xes_import_factory
from pathlib import Path
from pm4py.objects.log.util import sorting
import csv
NUMBER_OF_PARTS = 4
# file = Path(r'C:\Users\anton\ownCloud\Documents-Anton\Data-C\Event-logs\Sepsis')
# file = file / "sepsis_timestamp_sorted.xes"
file = Path(r'C:\Users\anton\ownCloud\Documents-Anton\Data-C\Event-logs')
#folder = "Traffic-fines"
folder = "Italian-help-desk"
folder = "bpic2011-Hospital-log"
folder = "bpic2015"
#file = file / folder / "italian_help_desk_timestamp_sorted.xes"
#file = file / folder / "Hospital_log.xes"
file = file / folder / "BPIC15_1.xes"
def ensure_path_exists(path):
if not os.path.exists(path):
os.makedirs(path)
ensure_path_exists(file)
################################
################################
# ADDITIONAL FUNCTIONS FIRST
def intersection_date(t1start,t1end,t2start,t2end):
return (t1start <= t2start <= t1end) or (t2start <= t1start <= t2end)
################################
################################
# IMPORT FILE IN XES
parameters = {"timestamp_sort": True}
log = xes_import_factory.apply(str(file), parameters=parameters)
date1 = log[0]._list[-1]._dict["time:timestamp"]
date2 = log[-1]._list[-1]._dict["time:timestamp"]
# SORT FILES IN THE ASCENDING ORDER OF THE CYCLE TIME
sorted_log = sorting.sort_lambda(log,
lambda x: (x._list[-1]._dict["time:timestamp"] - x._list[0]._dict["time:timestamp"]) , reverse=False)
# lambda x: x.attributes["concept:name"], reverse=False)
# SPLITS LOG INTO N PARTS (SORTED RIGHT BY FIRST TIMESTAMP)
def split_into_n_parts(log, n):
cl = []
lc_bound = [0]
for i in range(1,n+1):
lc_bound.append(int(len(log._list) / n) * i)
for i in range(1,n+1):
cl_temp = log._list[lc_bound[i-1]:lc_bound[i]]
cl_temp_sorted_tm = sorted(cl_temp, key=lambda x: x._list[0]._dict['time:timestamp'], reverse=True)
cl.append(cl_temp_sorted_tm)
return cl
clusters = split_into_n_parts(sorted_log,NUMBER_OF_PARTS)
# WITHIN ONE PART
# SPLOT INTO N (50) PARTS, AND COUNT CHARACTERISTIC
# here is clustering per time range
def count_per_timerange(data, date1,date2,n = 50):
date_diff = date2 - date1
date_diff_st = date_diff / n
calculate_characteristic = [0] * (n+1)
timeranges = [date1]
for i in range(n):
timeranges.append(date1 + date_diff_st * i)
for i in range(1,len(timeranges)):
for j in data:
if intersection_date(j._list[0]._dict['time:timestamp'],j._list[-1]._dict['time:timestamp'],timeranges[i-1],timeranges[i]):
calculate_characteristic[i] += 1
return calculate_characteristic
# here is clustering per cycle time
def count_per_timerange_cycle_time(data, date1,date2,n = 50):
date_diff = date2 - date1
date_diff_st = date_diff / n
calculate_characteristic = [0] * (n+1)
timeranges = [date1]
for i in range(n):
timeranges.append(date1 + date_diff_st * i)
for i in range(1,len(timeranges)):
char_ind = 0
for j in data:
if intersection_date(j._list[0]._dict['time:timestamp'],j._list[-1]._dict['time:timestamp'],timeranges[i-1],timeranges[i]):
char_ind += 1
calculate_characteristic[i] += (j._list[-1]._dict['time:timestamp'] - j._list[0]._dict['time:timestamp']).total_seconds()/3600
if not abs (calculate_characteristic[i]) < 0.001:
calculate_characteristic[i] /= char_ind # calculate average
return calculate_characteristic
ou = []
# for i in range(NUMBER_OF_PARTS):
# ou.append(count_per_timerange(clusters[i],date1,date2))
for i in range(NUMBER_OF_PARTS):
ou.append(count_per_timerange_cycle_time(clusters[i],date1,date2))
ou = list(map(list, zip(*ou)))
# OUTPUT VALUE
with open("out_cycle.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(ou)
print ("done")