Skip to content

Commit 928d56d

Browse files
committed
Address CodeRabbitAI review comments for SA benchmark
- Pin bench_serving repo to commit f3ea022a with shallow clone for supply-chain security (addresses S108/S603/S607 ruff warnings) - Make --trust-remote-code, --use-chat-template, --backend conditional in _to_sa_benchmark_cmd() to match _to_default_benchmark_cmd() style - Use None instead of 0.0 as user_throughput placeholder to distinguish "metric not available" from actual zero throughput
1 parent e051f24 commit 928d56d

1 file changed

Lines changed: 27 additions & 5 deletions

File tree

tests/integration/defs/perf/test_perf_sanity.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
}
6363

6464
BENCH_SERVING_REPO = "https://github.com/kedarpotdar-nv/bench_serving.git"
65+
BENCH_SERVING_COMMIT = "f3ea022a5780de5d0babc5fffa53634e2023d28f"
6566
BENCH_SERVING_DIR = "/tmp/bench_serving"
6667

6768

@@ -71,7 +72,22 @@ def ensure_bench_serving_repo() -> str:
7172
if not os.path.exists(bench_script):
7273
if os.path.exists(BENCH_SERVING_DIR):
7374
shutil.rmtree(BENCH_SERVING_DIR)
74-
subprocess.check_call(["git", "clone", BENCH_SERVING_REPO, BENCH_SERVING_DIR])
75+
subprocess.check_call(
76+
["git", "clone", "--depth", "1", BENCH_SERVING_REPO, BENCH_SERVING_DIR]
77+
)
78+
subprocess.check_call(
79+
[
80+
"git",
81+
"-C",
82+
BENCH_SERVING_DIR,
83+
"fetch",
84+
"--depth",
85+
"1",
86+
"origin",
87+
BENCH_SERVING_COMMIT,
88+
]
89+
)
90+
subprocess.check_call(["git", "-C", BENCH_SERVING_DIR, "checkout", BENCH_SERVING_COMMIT])
7591
return bench_script
7692

7793

@@ -486,7 +502,6 @@ def _to_sa_benchmark_cmd(self) -> List[str]:
486502
str(self.concurrency * self.iterations),
487503
"--max-concurrency",
488504
str(self.concurrency),
489-
"--trust-remote-code",
490505
"--ignore-eos",
491506
"--random-input-len",
492507
str(self.isl),
@@ -495,10 +510,16 @@ def _to_sa_benchmark_cmd(self) -> List[str]:
495510
"--random-range-ratio",
496511
str(self.random_range_ratio),
497512
"--save-result",
498-
"--use-chat-template",
499513
"--percentile-metrics",
500514
"ttft,tpot,itl,e2el",
501515
]
516+
if self.backend:
517+
benchmark_cmd.extend(["--backend", self.backend])
518+
if self.trust_remote_code:
519+
benchmark_cmd.append("--trust-remote-code")
520+
if self.use_chat_template:
521+
benchmark_cmd.append("--use-chat-template")
522+
# Note: bench_serving has no --non-streaming flag; streaming is backend-determined
502523
return benchmark_cmd
503524

504525
def _to_default_benchmark_cmd(self) -> List[str]:
@@ -1537,14 +1558,15 @@ def parse_metrics_from_output(output: str) -> Optional[Dict[str, float]]:
15371558
server_outputs = outputs.get(server_idx, [])
15381559
for client_idx, output in enumerate(server_outputs):
15391560
metrics = parse_metrics_from_output(output)
1540-
# SA benchmark (bench_serving) doesn't output user_throughput
1561+
# SA benchmark (bench_serving) doesn't report user_throughput.
1562+
# Use None as sentinel to distinguish "not available" from actual zero.
15411563
if (
15421564
metrics
15431565
and "user_throughput" not in metrics
15441566
and client_idx < len(client_configs)
15451567
and client_configs[client_idx].use_nv_sa_benchmark
15461568
):
1547-
metrics["user_throughput"] = 0.0
1569+
metrics["user_throughput"] = None
15481570
self._perf_results[server_idx].append(metrics)
15491571

15501572
def check_test_failure(self):

0 commit comments

Comments
 (0)