-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutilities_module.py
More file actions
115 lines (81 loc) · 3.77 KB
/
utilities_module.py
File metadata and controls
115 lines (81 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pandas as pd
from pm4py.objects.log.importer.xes import factory as xes_import_factory
from pm4py.statistics.traces.log.case_statistics import get_variant_statistics
from pm4py.objects.conversion.log.versions.to_dataframe import get_dataframe_from_event_stream
def read_event_log(dir):
#TODO: enable CSV and XES files.
log = pd.read_csv(dir)
log['time:timestamp'] = pd.to_datetime(log['time:timestamp'], utc=True)
return log
def read_event_log_xes(dir):
log = xes_import_factory.apply(dir)
data = get_dataframe_from_event_stream(log)
# log = conversion_factory.apply(data)
return data
def get_dfg_time(data):
"""
Returns the DFG matrix as a dictionary of lists. The key is the pair of acitivities
and the value is a list of values
"""
#moving first row to the last one
temp_row= data.iloc[0]
data2=data.copy()
data2.drop(data2.index[0], inplace=True)
data2=data2.append(temp_row)
#changing column names
columns= data2.columns
columns= [i+"_2" for i in columns]
data2.columns=columns
#combining the two dataframes into one
data = data.reset_index()
data2=data2.reset_index()
data=pd.concat([data, data2], axis=1)
#filter the rows with the same case
data=data[data['case:concept:name'] == data['case:concept:name_2']]
#calculating time difference
data['time:timestamp']=pd.to_datetime(data['time:timestamp'],utc=True)
data['time:timestamp_2'] = pd.to_datetime(data['time:timestamp_2'],utc=True)
data['difference'] = (data['time:timestamp_2'] - data['time:timestamp']).astype(
'timedelta64[m]') # in minutes
#reformating the data to build the dfg
data=data[['case:concept:name','concept:name', 'concept:name_2','difference']]
data=data.set_index(['case:concept:name','concept:name', 'concept:name_2'])
return data
def get_relative_time(data):
"""
Returns the event log with the relative time difference of every activity
"""
#moving first row to the last one
temp_row= data.iloc[0]
data2=data.copy()
data2.loc[-1]=temp_row
data2.index = data2.index + 1 # shifting index
data2.sort_index(inplace=True)
#changing column names
columns= data2.columns
columns= [i+"_2" for i in columns]
data2.columns=columns
#combining the two dataframes into one
data = data[['case:concept:name', 'concept:name', 'time:timestamp']]
data2 = data2[['case:concept:name_2', 'concept:name_2', 'time:timestamp_2']]
data = data.reset_index()
data2=data2.reset_index()
data=pd.concat([data, data2], axis=1)
#calculating time difference
data['time:timestamp']=pd.to_datetime(data['time:timestamp'],utc=True)
data['time:timestamp_2'] = pd.to_datetime(data['time:timestamp_2'],utc=True)
data['start_event']=False
data['relative_time'] = (data['time:timestamp'] - data['time:timestamp_2']).astype(
'timedelta64[m]') # in minutes
''' In case of the first activity, we set the relative time to the number of days since the start of the log
to make it an integer. We handle that in the file epsilon_estimation_start_timestamp.py
'''
min_timestamp= data['time:timestamp'].min()
data.loc[0,'relative_time']= (data.loc[0]['time:timestamp'] - min_timestamp).components.days/7
data.loc[data['case:concept:name'] != data['case:concept:name_2'], 'relative_time'] = \
(data.loc[data['case:concept:name'] !=data['case:concept:name_2'], 'time:timestamp'] - min_timestamp).dt.days/7
data.loc[data['case:concept:name'] != data['case:concept:name_2'], 'start_event']=True
data.loc[0,'start_event']=True
data.drop(data.tail(1).index, inplace=True)
data=data[['case:concept:name','concept:name','time:timestamp','relative_time','start_event']]
return data