1818import glob
1919import os
2020import re
21+ import shutil
2122import socket
2223import subprocess
2324import time
6061 "H200" : "h200" ,
6162}
6263
64+ BENCH_SERVING_REPO = "https://github.com/kedarpotdar-nv/bench_serving.git"
65+ BENCH_SERVING_COMMIT = "f3ea022a5780de5d0babc5fffa53634e2023d28f"
66+ BENCH_SERVING_DIR = "/tmp/bench_serving"
67+
68+
69+ def ensure_bench_serving_repo () -> str :
70+ """Clone bench_serving repo if not already present. Returns path to benchmark_serving.py."""
71+ bench_script = os .path .join (BENCH_SERVING_DIR , "benchmark_serving.py" )
72+ if not os .path .exists (bench_script ):
73+ if os .path .exists (BENCH_SERVING_DIR ):
74+ shutil .rmtree (BENCH_SERVING_DIR )
75+ subprocess .check_call (
76+ ["git" , "clone" , "--depth" , "1" , BENCH_SERVING_REPO , BENCH_SERVING_DIR ]
77+ )
78+ subprocess .check_call (
79+ [
80+ "git" ,
81+ "-C" ,
82+ BENCH_SERVING_DIR ,
83+ "fetch" ,
84+ "--depth" ,
85+ "1" ,
86+ "origin" ,
87+ BENCH_SERVING_COMMIT ,
88+ ]
89+ )
90+ subprocess .check_call (["git" , "-C" , BENCH_SERVING_DIR , "checkout" , BENCH_SERVING_COMMIT ])
91+ return bench_script
92+
93+
6394DEFAULT_TIMEOUT = 5400
6495AGG_CONFIG_FOLDER = os .environ .get ("AGG_CONFIG_FOLDER" , "tests/scripts/perf-sanity/aggregated" )
6596DISAGG_CONFIG_FOLDER = os .environ .get (
@@ -439,6 +470,7 @@ def __init__(
439470 self .trust_remote_code = client_config_data .get ("trust_remote_code" , True )
440471 self .model_path = ""
441472 self .dataset_file = client_config_data .get ("dataset_file" , "" )
473+ self .use_nv_sa_benchmark = client_config_data .get ("use_nv_sa_benchmark" , False )
442474 self .env_vars = env_vars
443475
444476 # Generate default name if not provided
@@ -450,6 +482,48 @@ def to_cmd(self) -> List[str]:
450482 """Generate benchmark command."""
451483 model_dir = get_model_dir (self .model_name )
452484 self .model_path = model_dir if os .path .exists (model_dir ) else self .model_name
485+
486+ if self .use_nv_sa_benchmark :
487+ return self ._to_sa_benchmark_cmd ()
488+ else :
489+ return self ._to_default_benchmark_cmd ()
490+
491+ def _to_sa_benchmark_cmd (self ) -> List [str ]:
492+ """Generate SA benchmark command (bench_serving repo)."""
493+ bench_script = ensure_bench_serving_repo ()
494+ benchmark_cmd = [
495+ "python" ,
496+ bench_script ,
497+ "--model" ,
498+ self .model_path ,
499+ "--dataset-name" ,
500+ "random" ,
501+ "--num-prompts" ,
502+ str (self .concurrency * self .iterations ),
503+ "--max-concurrency" ,
504+ str (self .concurrency ),
505+ "--ignore-eos" ,
506+ "--random-input-len" ,
507+ str (self .isl ),
508+ "--random-output-len" ,
509+ str (self .osl ),
510+ "--random-range-ratio" ,
511+ str (self .random_range_ratio ),
512+ "--save-result" ,
513+ "--percentile-metrics" ,
514+ "ttft,tpot,itl,e2el" ,
515+ ]
516+ if self .backend :
517+ benchmark_cmd .extend (["--backend" , self .backend ])
518+ if self .trust_remote_code :
519+ benchmark_cmd .append ("--trust-remote-code" )
520+ if self .use_chat_template :
521+ benchmark_cmd .append ("--use-chat-template" )
522+ # Note: bench_serving has no --non-streaming flag; streaming is backend-determined
523+ return benchmark_cmd
524+
525+ def _to_default_benchmark_cmd (self ) -> List [str ]:
526+ """Generate default benchmark command (tensorrt_llm benchmark_serving)."""
453527 dataset_path = get_dataset_dir (self .dataset_file )
454528 benchmark_cmd = [
455529 "python" ,
@@ -513,6 +587,7 @@ def to_match_keys(self) -> List[str]:
513587 "s_backend" ,
514588 "b_use_chat_template" ,
515589 "b_streaming" ,
590+ "b_use_nv_sa_benchmark" ,
516591 ]
517592
518593 def to_db_data (self ) -> dict :
@@ -529,6 +604,7 @@ def to_db_data(self) -> dict:
529604 "b_use_chat_template" : self .use_chat_template ,
530605 "b_streaming" : self .streaming ,
531606 "b_trust_remote_code" : self .trust_remote_code ,
607+ "b_use_nv_sa_benchmark" : self .use_nv_sa_benchmark ,
532608 "s_client_log_link" : "" ,
533609 "s_client_env_vars" : self .env_vars ,
534610 }
@@ -1292,6 +1368,7 @@ def _parse_disagg_config_file(self, config_file_path: str, config_file: str):
12921368 # For ctx_only: OSL is set to 1 and dataset_file is empty
12931369 osl = 1 if benchmark_mode == "ctx_only" else benchmark .get ("output_length" , 1024 )
12941370 dataset_file = "" if benchmark_mode == "ctx_only" else benchmark .get ("dataset_file" , "" )
1371+ use_nv_sa_benchmark = benchmark .get ("use_nv_sa_benchmark" , False )
12951372
12961373 client_configs = []
12971374 for concurrency in concurrency_values :
@@ -1305,6 +1382,7 @@ def _parse_disagg_config_file(self, config_file_path: str, config_file: str):
13051382 "use_chat_template" : False ,
13061383 "streaming" : benchmark .get ("streaming" , True ),
13071384 "dataset_file" : dataset_file ,
1385+ "use_nv_sa_benchmark" : use_nv_sa_benchmark ,
13081386 }
13091387 client_config = ClientConfig (
13101388 client_config_data ,
@@ -1478,8 +1556,17 @@ def parse_metrics_from_output(output: str) -> Optional[Dict[str, float]]:
14781556 for server_idx , client_configs in self .server_client_configs .items ():
14791557 self ._perf_results [server_idx ] = []
14801558 server_outputs = outputs .get (server_idx , [])
1481- for output in server_outputs :
1559+ for client_idx , output in enumerate ( server_outputs ) :
14821560 metrics = parse_metrics_from_output (output )
1561+ # SA benchmark (bench_serving) doesn't report user_throughput.
1562+ # Use None as sentinel to distinguish "not available" from actual zero.
1563+ if (
1564+ metrics
1565+ and "user_throughput" not in metrics
1566+ and client_idx < len (client_configs )
1567+ and client_configs [client_idx ].use_nv_sa_benchmark
1568+ ):
1569+ metrics ["user_throughput" ] = None
14831570 self ._perf_results [server_idx ].append (metrics )
14841571
14851572 def check_test_failure (self ):
0 commit comments