-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathauto_tuning.py
More file actions
66 lines (53 loc) · 1.93 KB
/
auto_tuning.py
File metadata and controls
66 lines (53 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import tempfile
import tvm
from tvm import meta_schedule as ms
from tvm import tir
from tvm.meta_schedule.space_generator import ScheduleFn
from evaluate import test_numerical_correctness
from gemm_relu_add import gemm_relu_add
def auto_tuning_schedule(sch: tir.Schedule) -> tir.Schedule:
"""The function that defines the schedule space for automatic tuning.
Parameters
----------
sch : tir.Schedule
An empty schedule of the GeMM + ReLU + add workload.
Returns
-------
sch : tir.Schedule
The updated schedule of the GeMM + ReLU + add workload.
"""
"""TODO: Your code here"""
# NOTE: You may need to set argument `preserve_unit_loops=True`
# in `compute_at` and `reverse_compute_at` to make it work
# with auto tuning.
...
return sch
def auto_tune():
with tempfile.TemporaryDirectory() as work_dir:
target = tvm.target.Target(
{
"kind": "cuda",
"max_shared_memory_per_block": 49152,
"max_threads_per_block": 1024,
"thread_warp_size": 32,
}
)
# Tune the workload and record the evaluated schedules into the database.
database = ms.tir_integration.tune_tir(
mod=gemm_relu_add,
target=target,
work_dir=work_dir,
max_trials_global=64, # We try 64 schedules in the search space.
num_trials_per_iter=32,
space=ScheduleFn(sch_fn=auto_tuning_schedule),
)
# Retrieve the best performant schedule from the database.
sch = ms.tir_integration.compile_tir(database, gemm_relu_add, target)
assert sch is not None, "No valid schedule found!"
# Print out the optimized function and the schedule.
sch.mod.show()
sch.trace.show()
# Test the numerical correctness.
test_numerical_correctness(sch)
if __name__ == "__main__":
auto_tune()