Skip to content

Commit 09f6d2e

Browse files
committed
update
Signed-off-by: Chenfei Zhang <chenfeiz@nvidia.com>
1 parent e31d8d7 commit 09f6d2e

2 files changed

Lines changed: 125 additions & 2 deletions

File tree

tests/integration/defs/perf/open_search_db_utils.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ def is_empty(value):
5454
for field in match_keys:
5555
history_value = history_data.get(field, None)
5656
new_value = new_data.get(field, None)
57+
# For boolean fields (b_ prefix), treat None/missing as False.
58+
# This ensures backward compatibility when new boolean match keys
59+
# are added — historical data without the field can still match
60+
# current data where the field defaults to False.
61+
if field.startswith("b_"):
62+
if history_value is None:
63+
history_value = False
64+
if new_value is None:
65+
new_value = False
5766
if is_empty(history_value) and is_empty(new_value):
5867
continue
5968
if history_value != new_value:
@@ -138,7 +147,34 @@ def parse_timestamp(timestamp):
138147
},
139148
]
140149
for key, value in common_values_dict.items():
141-
must_clauses.append({"term": {key: value}})
150+
if key.startswith("b_") and value is False:
151+
# For boolean fields with value False, also match documents
152+
# where the field is missing (backward compatibility for
153+
# newly added boolean match keys).
154+
must_clauses.append({
155+
"bool": {
156+
"should": [
157+
{
158+
"term": {
159+
key: False
160+
}
161+
},
162+
{
163+
"bool": {
164+
"must_not": [{
165+
"exists": {
166+
"field": key
167+
}
168+
}]
169+
}
170+
},
171+
],
172+
"minimum_should_match":
173+
1,
174+
}
175+
})
176+
else:
177+
must_clauses.append({"term": {key: value}})
142178
history_data_list = OpenSearchDB.queryPerfDataFromOpenSearchDB(
143179
TEST_INFO_PROJECT_NAME, must_clauses, size=MAX_QUERY_SIZE)
144180

tests/integration/defs/perf/test_perf_sanity.py

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import glob
1919
import os
2020
import re
21+
import shutil
2122
import socket
2223
import subprocess
2324
import time
@@ -60,6 +61,36 @@
6061
"H200": "h200",
6162
}
6263

64+
BENCH_SERVING_REPO = "https://github.com/kedarpotdar-nv/bench_serving.git"
65+
BENCH_SERVING_COMMIT = "f3ea022a5780de5d0babc5fffa53634e2023d28f"
66+
BENCH_SERVING_DIR = "/tmp/bench_serving"
67+
68+
69+
def ensure_bench_serving_repo() -> str:
70+
"""Clone bench_serving repo if not already present. Returns path to benchmark_serving.py."""
71+
bench_script = os.path.join(BENCH_SERVING_DIR, "benchmark_serving.py")
72+
if not os.path.exists(bench_script):
73+
if os.path.exists(BENCH_SERVING_DIR):
74+
shutil.rmtree(BENCH_SERVING_DIR)
75+
subprocess.check_call(
76+
["git", "clone", "--depth", "1", BENCH_SERVING_REPO, BENCH_SERVING_DIR]
77+
)
78+
subprocess.check_call(
79+
[
80+
"git",
81+
"-C",
82+
BENCH_SERVING_DIR,
83+
"fetch",
84+
"--depth",
85+
"1",
86+
"origin",
87+
BENCH_SERVING_COMMIT,
88+
]
89+
)
90+
subprocess.check_call(["git", "-C", BENCH_SERVING_DIR, "checkout", BENCH_SERVING_COMMIT])
91+
return bench_script
92+
93+
6394
DEFAULT_TIMEOUT = 5400
6495
AGG_CONFIG_FOLDER = os.environ.get("AGG_CONFIG_FOLDER", "tests/scripts/perf-sanity/aggregated")
6596
DISAGG_CONFIG_FOLDER = os.environ.get(
@@ -439,6 +470,7 @@ def __init__(
439470
self.trust_remote_code = client_config_data.get("trust_remote_code", True)
440471
self.model_path = ""
441472
self.dataset_file = client_config_data.get("dataset_file", "")
473+
self.use_nv_sa_benchmark = client_config_data.get("use_nv_sa_benchmark", False)
442474
self.env_vars = env_vars
443475

444476
# Generate default name if not provided
@@ -450,6 +482,48 @@ def to_cmd(self) -> List[str]:
450482
"""Generate benchmark command."""
451483
model_dir = get_model_dir(self.model_name)
452484
self.model_path = model_dir if os.path.exists(model_dir) else self.model_name
485+
486+
if self.use_nv_sa_benchmark:
487+
return self._to_sa_benchmark_cmd()
488+
else:
489+
return self._to_default_benchmark_cmd()
490+
491+
def _to_sa_benchmark_cmd(self) -> List[str]:
492+
"""Generate SA benchmark command (bench_serving repo)."""
493+
bench_script = ensure_bench_serving_repo()
494+
benchmark_cmd = [
495+
"python",
496+
bench_script,
497+
"--model",
498+
self.model_path,
499+
"--dataset-name",
500+
"random",
501+
"--num-prompts",
502+
str(self.concurrency * self.iterations),
503+
"--max-concurrency",
504+
str(self.concurrency),
505+
"--ignore-eos",
506+
"--random-input-len",
507+
str(self.isl),
508+
"--random-output-len",
509+
str(self.osl),
510+
"--random-range-ratio",
511+
str(self.random_range_ratio),
512+
"--save-result",
513+
"--percentile-metrics",
514+
"ttft,tpot,itl,e2el",
515+
]
516+
if self.backend:
517+
benchmark_cmd.extend(["--backend", self.backend])
518+
if self.trust_remote_code:
519+
benchmark_cmd.append("--trust-remote-code")
520+
if self.use_chat_template:
521+
benchmark_cmd.append("--use-chat-template")
522+
# Note: bench_serving has no --non-streaming flag; streaming is backend-determined
523+
return benchmark_cmd
524+
525+
def _to_default_benchmark_cmd(self) -> List[str]:
526+
"""Generate default benchmark command (tensorrt_llm benchmark_serving)."""
453527
dataset_path = get_dataset_dir(self.dataset_file)
454528
benchmark_cmd = [
455529
"python",
@@ -513,6 +587,7 @@ def to_match_keys(self) -> List[str]:
513587
"s_backend",
514588
"b_use_chat_template",
515589
"b_streaming",
590+
"b_use_nv_sa_benchmark",
516591
]
517592

518593
def to_db_data(self) -> dict:
@@ -529,6 +604,7 @@ def to_db_data(self) -> dict:
529604
"b_use_chat_template": self.use_chat_template,
530605
"b_streaming": self.streaming,
531606
"b_trust_remote_code": self.trust_remote_code,
607+
"b_use_nv_sa_benchmark": self.use_nv_sa_benchmark,
532608
"s_client_log_link": "",
533609
"s_client_env_vars": self.env_vars,
534610
}
@@ -1292,6 +1368,7 @@ def _parse_disagg_config_file(self, config_file_path: str, config_file: str):
12921368
# For ctx_only: OSL is set to 1 and dataset_file is empty
12931369
osl = 1 if benchmark_mode == "ctx_only" else benchmark.get("output_length", 1024)
12941370
dataset_file = "" if benchmark_mode == "ctx_only" else benchmark.get("dataset_file", "")
1371+
use_nv_sa_benchmark = benchmark.get("use_nv_sa_benchmark", False)
12951372

12961373
client_configs = []
12971374
for concurrency in concurrency_values:
@@ -1305,6 +1382,7 @@ def _parse_disagg_config_file(self, config_file_path: str, config_file: str):
13051382
"use_chat_template": False,
13061383
"streaming": benchmark.get("streaming", True),
13071384
"dataset_file": dataset_file,
1385+
"use_nv_sa_benchmark": use_nv_sa_benchmark,
13081386
}
13091387
client_config = ClientConfig(
13101388
client_config_data,
@@ -1478,8 +1556,17 @@ def parse_metrics_from_output(output: str) -> Optional[Dict[str, float]]:
14781556
for server_idx, client_configs in self.server_client_configs.items():
14791557
self._perf_results[server_idx] = []
14801558
server_outputs = outputs.get(server_idx, [])
1481-
for output in server_outputs:
1559+
for client_idx, output in enumerate(server_outputs):
14821560
metrics = parse_metrics_from_output(output)
1561+
# SA benchmark (bench_serving) doesn't report user_throughput.
1562+
# Use None as sentinel to distinguish "not available" from actual zero.
1563+
if (
1564+
metrics
1565+
and "user_throughput" not in metrics
1566+
and client_idx < len(client_configs)
1567+
and client_configs[client_idx].use_nv_sa_benchmark
1568+
):
1569+
metrics["user_throughput"] = None
14831570
self._perf_results[server_idx].append(metrics)
14841571

14851572
def check_test_failure(self):

0 commit comments

Comments
 (0)