Skip to content

Commit 859a4cf

Browse files
authored
[FIX] exclude zero_ op impact on benchmark (flagos-ai#1896)
1 parent de56894 commit 859a4cf

1 file changed

Lines changed: 11 additions & 4 deletions

File tree

benchmark/performance_utils.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,17 @@ def run(self):
404404
self.gems_op, *args, **kwargs
405405
)
406406
else:
407-
with flag_gems.use_gems():
408-
metric.latency = self.get_latency(
409-
self.torch_op, *args, **kwargs
410-
)
407+
if self.op_name == "zero_":
408+
with flag_gems.use_gems():
409+
metric.latency = self.get_latency(
410+
self.torch_op, *args, **kwargs
411+
)
412+
else:
413+
# exclude flaggems' zero_ to avoid the overhead of zero_ in do_bench's clear_cache
414+
with flag_gems.use_gems(exclude=["zero_"]):
415+
metric.latency = self.get_latency(
416+
self.torch_op, *args, **kwargs
417+
)
411418
if "speedup" in self.to_bench_metrics:
412419
metric.speedup = metric.latency_base / metric.latency
413420
if "gbps" in self.to_bench_metrics:

0 commit comments

Comments
 (0)