We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent de56894 commit 859a4cfCopy full SHA for 859a4cf
1 file changed
benchmark/performance_utils.py
@@ -404,10 +404,17 @@ def run(self):
404
self.gems_op, *args, **kwargs
405
)
406
else:
407
- with flag_gems.use_gems():
408
- metric.latency = self.get_latency(
409
- self.torch_op, *args, **kwargs
410
- )
+ if self.op_name == "zero_":
+ with flag_gems.use_gems():
+ metric.latency = self.get_latency(
+ self.torch_op, *args, **kwargs
411
+ )
412
+ else:
413
+ # exclude flaggems' zero_ to avoid the overhead of zero_ in do_bench's clear_cache
414
+ with flag_gems.use_gems(exclude=["zero_"]):
415
416
417
418
if "speedup" in self.to_bench_metrics:
419
metric.speedup = metric.latency_base / metric.latency
420
if "gbps" in self.to_bench_metrics:
0 commit comments