-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathtask_processor.py
More file actions
215 lines (175 loc) · 5.93 KB
/
task_processor.py
File metadata and controls
215 lines (175 loc) · 5.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import os
import re
import glob
import json
TESTING = 0
SILENT = False
def log(msg):
# Output message if we are not running on silent mode
global SILENT
if not SILENT:
print(msg)
def pattern_match(name, artifacts_to_get):
"""
Match an artifact that was requested with the name we have.
"""
if not artifacts_to_get:
return None
for aname in artifacts_to_get:
if aname in name:
return aname
return None
def sorted_nicely(data):
"""
Sort the given iterable in the way that humans expect.
"""
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
return sorted(data, key=alphanum_key)
def match_vismets_with_videos(task_group_id, path, vismet_task_ids):
"""
Returns a mapping from vismet task IDs to the videos.
"""
task_dir = os.path.join(path, task_group_id)
taskgraph_json = os.path.join(task_dir, "task-group-information.json")
with open(taskgraph_json) as f:
taskgraph = json.load(f)
# First filter down to only browsertime tasks
mapping = {task_id: None for task_id in vismet_task_ids}
for task in taskgraph:
task_id = task.get("status", {}).get("taskId", "")
if task_id not in mapping:
continue
vismet_fetches = json.loads(task["task"]["payload"]["env"]["MOZ_FETCHES"])
for fetch in vismet_fetches:
if "browsertime-results" in fetch["artifact"]:
mapping[task_id] = fetch["task"]
break
if all(mapping):
break
return mapping
def get_task_data_paths(
task_group_id,
path,
run_number=None,
artifact=[],
artifact_dir="",
suite_matcher="",
silent=False,
):
"""
Opens a folder for a task group and returns the files
contained within it.
"""
global SILENT
SILENT = silent
if type(artifact) not in (list,):
artifact = [artifact]
data = {}
# Get the directory to search
task_dir = os.path.join(path, task_group_id)
if not os.path.exists(task_dir):
log("Cannot open task directory: %s" % task_dir)
return
if run_number is None:
curr_dir = os.getcwd()
os.chdir(task_dir)
dir_list = next(os.walk("."))[1]
max_num = 0
for subdir in dir_list:
run_num = int(subdir)
if run_num > max_num:
max_num = run_num
os.chdir(curr_dir)
run_number = max_num
log("No run number supplied. Using the latest one, run number %s" % run_number)
run_dir = os.path.join(task_dir, str(run_number))
all_suites = [
f for f in os.listdir(run_dir) if os.path.isdir(os.path.join(run_dir, f))
]
# Find all the data for this task group
for suite in all_suites:
for aname in artifact:
if suite_matcher and suite_matcher not in suite:
continue
suite_dir = os.path.join(run_dir, suite)
# Get the suite's data directory
if not artifact_dir:
artifact_dir = aname
all_dirs = [
f
for f in os.listdir(suite_dir)
if os.path.isdir(os.path.join(suite_dir, f))
]
suite_data_dir = None
for d in all_dirs:
if pattern_match(d, [aname]) or (
not artifact_dir and d.endswith("_data")
):
suite_data_dir = os.path.join(suite_dir, d)
break
if not suite_data_dir:
log("Cannot find data directory in %s, skipping" % suite_dir)
continue
# Now find all data files and order them
all_files = glob.glob(os.path.join(suite_data_dir, "**/*"), recursive=True)
all_files = [
file
for file in all_files
if artifact and pattern_match(os.path.split(file)[-1], [aname])
]
if suite not in data:
data[suite] = []
data[suite].extend(all_files)
data[suite] = sorted_nicely(data[suite])
return data
def get_task_data(
task_group_id, path, run_number=None, artifact="", suite_matcher="", silent=False
):
"""
Get the task data paths and opens the data into
a detected file format. By default, when an unknown file
format is encountered, the lines will be read and returned.
"""
global SILENT
SILENT = silent
data = {}
data_paths = get_task_data_paths(
task_group_id,
path,
run_number=run_number,
artifact=artifact,
suite_matcher=suite_matcher,
silent=silent,
)
for suite, paths in data_paths.items():
data[suite] = []
for path in paths:
tmpdata = None
log("Opening %s..." % path)
if path.endswith(".json"):
with open(path, "r") as f:
tmpdata = json.load(f)
else:
with open(path, "r") as f:
tmpdata = f.readlines()
data[suite].append({"data": tmpdata, "file": path})
return data
if __name__ == "__main__":
if TESTING:
data = get_task_data_paths(
"SssyewAFQiKm40PIouxo_g",
"/home/sparky/mozilla-source/analysis-scripts/perfunct-testing-data",
artifact="perfherder-data",
run_number="4",
)
print(json.dumps(data, indent=4))
data = get_task_data(
"SssyewAFQiKm40PIouxo_g",
"/home/sparky/mozilla-source/analysis-scripts/perfunct-testing-data",
artifact="perfherder-data",
run_number="4",
)