From 7afa9a76b3f074ba5607baec9f84eb2faee60c7e Mon Sep 17 00:00:00 2001 From: Curtis Anderson Date: Tue, 23 Jun 2026 21:33:25 -0700 Subject: [PATCH] fix(kvcache): consolidate per-option workload params in mlpstorage (#498, #500) The mlperf_wrapper.py script was unconditionally injecting WORKLOAD_PARAMS on every kv-cache.py invocation, overriding user --config YAML and CLI flags even in OPEN mode. CLOSED/OPEN gating already lives in mlpstorage_py/cli/kvcache_args.py, so the wrapper-side squashing was both redundant in CLOSED and actively harmful in OPEN. WORKLOAD_PARAMS now lives only in mlpstorage_py.benchmarks.kvcache. KVCacheBenchmark._build_option_kvcache_args emits the per-option flags: verbatim in CLOSED, with user CLI flags superseding per-option defaults in OPEN. max-concurrent-allocs stays at the option's mandated value because it has no OPEN CLI flag. mlperf_wrapper.py becomes a pure rank-aware launcher: --rank-output-base, --rank-cache-base, --seed-base, --start-delay, --end-delay; everything else forwards verbatim to kv-cache.py. Per-rank --seed/--output/--cache-dir are appended last so argparse store-action takes them over forwarded duplicates. allow_abbrev=False prevents a forwarded --seed from collapsing into --seed-base. --- kv_cache_benchmark/mlperf_wrapper.py | 129 +++++++------------- mlpstorage_py/benchmarks/kvcache.py | 95 ++++++++++++++- tests/unit/test_benchmarks_kvcache.py | 167 +++++++++++++++++++++++++- tests/unit/test_mlperf_wrapper.py | 90 ++++++++------ 4 files changed, 347 insertions(+), 134 deletions(-) diff --git a/kv_cache_benchmark/mlperf_wrapper.py b/kv_cache_benchmark/mlperf_wrapper.py index dc3a5296..896616ae 100755 --- a/kv_cache_benchmark/mlperf_wrapper.py +++ b/kv_cache_benchmark/mlperf_wrapper.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 -"""MPI-rank-aware wrapper for MLPerf KV Cache benchmark. +"""MPI-rank-aware launcher for the MLPerf KV Cache benchmark. Invoked by mpirun per-rank; reads OMPI_COMM_WORLD_RANK (OpenMPI) or PMI_RANK -(MPICH) to determine this rank's index. Computes per-rank seed, output -directory, and cache directory, then invokes kv-cache.py with the fixed MLPerf -v3.0 parameters for the specified option. +(MPICH) to determine this rank's index. The wrapper does NOT encode any +workload parameters — it only computes per-rank values (seed, output file, +cache directory) and forwards everything else to kv-cache.py. The caller +(mlpstorage_py.benchmarks.kvcache.KVCacheBenchmark) owns the per-option +workload parameter set and CLOSED/OPEN enforcement. """ import argparse @@ -18,40 +20,6 @@ BASE_SEED = 42 TEST_DELAY = 90 -# MLPerf v3.0 fixed parameters per option. -# All numeric values stored as int/float; converted to str when building cmd. -# 'generation-mode' is ALWAYS 'none' for MLPerf compliance — do NOT rely on -# kv-cache.py defaults; the default may change in future versions. -WORKLOAD_PARAMS = { - 1: { - 'model': 'llama3.1-8b', - 'num-users': 200, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 0, - 'max-concurrent-allocs': 16, - 'generation-mode': 'none', - }, - 2: { - 'model': 'llama3.1-8b', - 'num-users': 100, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 4, - 'max-concurrent-allocs': 16, - 'generation-mode': 'none', - }, - 3: { - 'model': 'llama3.1-70b-instruct', - 'num-users': 70, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 0, - 'max-concurrent-allocs': 4, - 'generation-mode': 'none', - }, -} - def get_rank() -> int: """Read global MPI rank from environment (no mpi4py). @@ -79,50 +47,57 @@ def get_rank() -> int: def main(): + # allow_abbrev=False so forwarded kv-cache.py flags like --seed are not + # silently swallowed by a prefix match against --seed-base. parser = argparse.ArgumentParser( - description="MLPerf KV Cache MPI-rank-aware wrapper" + description="MLPerf KV Cache MPI-rank-aware launcher", + allow_abbrev=False, ) parser.add_argument( - '--option', - type=int, - choices=[1, 2, 3], + '--rank-output-base', + type=str, required=True, - help="MLPerf v3.0 option (1=Max Storage Stress, 2=Storage Throughput, 3=Large Model 70B).", + dest='rank_output_base', + help="Base output directory. Per-rank results written to /rank_/.", ) parser.add_argument( - '--seed', + '--rank-cache-base', + type=str, + required=True, + dest='rank_cache_base', + help="Base cache directory. Per-rank cache written to /rank_/.", + ) + parser.add_argument( + '--seed-base', type=int, default=BASE_SEED, + dest='seed_base', help=f"Base random seed (default: {BASE_SEED}). Effective seed = base + rank.", ) parser.add_argument( - '--base-output-dir', - type=str, - required=True, - dest='base_output_dir', - help="Base output directory. Per-rank results written to /rank_/.", - ) - parser.add_argument( - '--cache-dir', - type=str, - required=True, - dest='cache_dir', - help="Base cache directory. Per-rank cache written to /rank_/.", + '--start-delay', + type=int, + default=TEST_DELAY, + dest='start_delay', + help=f"Seconds to sleep before invoking kv-cache.py (default: {TEST_DELAY}).", ) parser.add_argument( - '--config', - type=str, - default=None, - help="Path to YAML config file. Defaults to config.yaml adjacent to this script (D-02).", + '--end-delay', + type=int, + default=TEST_DELAY, + dest='end_delay', + help=f"Seconds to sleep after kv-cache.py exits (default: {TEST_DELAY}).", ) - args = parser.parse_args() + # Wrapper-specific flags are consumed; everything else is forwarded to + # kv-cache.py verbatim (including --config, --model, --num-users, etc.). + args, forwarded = parser.parse_known_args() rank = get_rank() - effective_seed = args.seed + rank + effective_seed = args.seed_base + rank - rank_output_dir = Path(args.base_output_dir) / f"rank_{rank}" - rank_cache_dir = Path(args.cache_dir) / f"rank_{rank}" + rank_output_dir = Path(args.rank_output_base) / f"rank_{rank}" + rank_cache_dir = Path(args.rank_cache_base) / f"rank_{rank}" rank_output_dir.mkdir(parents=True, exist_ok=True) rank_cache_dir.mkdir(parents=True, exist_ok=True) @@ -130,38 +105,26 @@ def main(): ts = datetime.now().strftime('%Y%m%d_%H%M%S') output_file = rank_output_dir / f"kvcache_results_{ts}.json" - # D-01: kv-cache.py located relative to this script; both share kv_cache_benchmark/ kvcache_script = Path(__file__).parent / 'kv-cache.py' - # D-02: config.yaml located relative to this script; user may override via --config - config_path = args.config or str(Path(__file__).parent / 'config.yaml') - - params = WORKLOAD_PARAMS[args.option] + # Per-rank seed/output/cache are appended last so argparse store-action + # uses them over any duplicates that came in via --forwarded args. cmd = [ sys.executable, str(kvcache_script), - '--config', config_path, + *forwarded, '--seed', str(effective_seed), '--output', str(output_file), '--cache-dir', str(rank_cache_dir), - '--model', params['model'], - '--num-users', str(params['num-users']), - '--duration', str(params['duration']), - '--gpu-mem-gb', str(params['gpu-mem-gb']), - '--cpu-mem-gb', str(params['cpu-mem-gb']), - '--max-concurrent-allocs', str(params['max-concurrent-allocs']), - # '--generation-mode' is passed EXPLICITLY — do NOT omit even though the - # value is always 'none'. kv-cache.py defaults may change in future versions. - '--generation-mode', params['generation-mode'], ] - print(f"KV Cache Wrapper - Start delay for {TEST_DELAY} seconds") - time.sleep(TEST_DELAY) + print(f"KV Cache Wrapper - Start delay for {args.start_delay} seconds") + time.sleep(args.start_delay) print(f"KV Cache Wrapper - Starting benchmark pass...") result = subprocess.run(cmd) - print(f"KV Cache Wrapper - End delay for {TEST_DELAY} seconds") - time.sleep(TEST_DELAY) + print(f"KV Cache Wrapper - End delay for {args.end_delay} seconds") + time.sleep(args.end_delay) print(f"KV Cache Wrapper - Finished benchmark pass") sys.exit(result.returncode) diff --git a/mlpstorage_py/benchmarks/kvcache.py b/mlpstorage_py/benchmarks/kvcache.py index 42dd423f..06c26919 100755 --- a/mlpstorage_py/benchmarks/kvcache.py +++ b/mlpstorage_py/benchmarks/kvcache.py @@ -33,6 +33,40 @@ from mlpstorage_py.utils import generate_mpi_prefix_cmd, MLPSJsonEncoder +# MLPerf v3.0 fixed per-option workload parameters. Single source of truth for +# what option N means at the kv-cache.py level. In CLOSED these are mandated; +# in OPEN they act as per-option defaults that user CLI flags supersede. +WORKLOAD_PARAMS = { + 1: { + 'model': 'llama3.1-8b', + 'num-users': 200, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 0, + 'max-concurrent-allocs': 16, + 'generation-mode': 'none', + }, + 2: { + 'model': 'llama3.1-8b', + 'num-users': 100, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 4, + 'max-concurrent-allocs': 16, + 'generation-mode': 'none', + }, + 3: { + 'model': 'llama3.1-70b-instruct', + 'num-users': 70, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 0, + 'max-concurrent-allocs': 4, + 'generation-mode': 'none', + }, +} + + class KVCacheBenchmark(Benchmark): """KV Cache benchmark for LLM inference storage. @@ -234,6 +268,8 @@ def _execute_run(self) -> int: ) wrapper_path = Path(self.kvcache_bin_path).parent / 'mlperf_wrapper.py' + # Wrapper-adjacent config.yaml is the default; CLOSED forbids overriding. + config_path = config or str(Path(self.kvcache_bin_path).parent / 'config.yaml') mpi_prefix = generate_mpi_prefix_cmd( mpi_cmd=getattr(self.args, 'mpi_bin', 'mpirun'), @@ -248,6 +284,7 @@ def _execute_run(self) -> int: option_results = {} for option in [1, 2, 3]: + option_kv_args = self._build_option_kvcache_args(option, is_closed) trial_dirs = [] for trial in range(trials): @@ -258,13 +295,12 @@ def _execute_run(self) -> int: wrapper_cmd = ( f"{mpi_prefix} {sys.executable} {wrapper_path}" - f" --option {option}" - f" --seed {seed}" - f" --base-output-dir {option_trial_dir}" - f" --cache-dir {cache_dir}" + f" --rank-output-base {option_trial_dir}" + f" --rank-cache-base {cache_dir}" + f" --seed-base {seed}" + f" --config {config_path}" + f" {' '.join(option_kv_args)}" ) - if config: - wrapper_cmd += f" --config {config}" self.logger.status(f"Running option {option} trial {trial + 1}/{trials}...") self._execute_command( @@ -292,6 +328,53 @@ def _execute_run(self) -> int: self.write_cluster_info() return 0 + def _build_option_kvcache_args(self, option: int, is_closed: bool) -> List[str]: + """Return the kv-cache.py CLI args for this option. + + CLOSED: emits WORKLOAD_PARAMS[option] verbatim — MLPerf-mandated, no + user input can reach kv-cache.py through this path because the CLOSED + CLI does not expose the corresponding flags. + + OPEN: user-set flags supersede WORKLOAD_PARAMS[option] one key at a + time. max-concurrent-allocs is not exposed by the OPEN CLI, so it + always comes from WORKLOAD_PARAMS. + """ + defaults = WORKLOAD_PARAMS[option] + if is_closed: + params = dict(defaults) + else: + params = { + 'model': getattr(self.args, 'model', None) or defaults['model'], + 'num-users': ( + getattr(self.args, 'num_users', None) + if getattr(self.args, 'num_users', None) is not None + else defaults['num-users'] + ), + 'duration': ( + getattr(self.args, 'duration', None) + if getattr(self.args, 'duration', None) is not None + else defaults['duration'] + ), + 'gpu-mem-gb': ( + getattr(self.args, 'gpu_mem_gb', None) + if getattr(self.args, 'gpu_mem_gb', None) is not None + else defaults['gpu-mem-gb'] + ), + 'cpu-mem-gb': ( + getattr(self.args, 'cpu_mem_gb', None) + if getattr(self.args, 'cpu_mem_gb', None) is not None + else defaults['cpu-mem-gb'] + ), + 'max-concurrent-allocs': defaults['max-concurrent-allocs'], + 'generation-mode': ( + getattr(self.args, 'generation_mode', None) or defaults['generation-mode'] + ), + } + out = [] + for key, value in params.items(): + out.extend([f'--{key}', str(value)]) + return out + def _execute_datasize(self) -> int: """Calculate memory requirements for KV cache. diff --git a/tests/unit/test_benchmarks_kvcache.py b/tests/unit/test_benchmarks_kvcache.py index 818cf404..c4158033 100755 --- a/tests/unit/test_benchmarks_kvcache.py +++ b/tests/unit/test_benchmarks_kvcache.py @@ -835,8 +835,13 @@ def fake_execute(cmd, **kwargs): assert '--npernode 2' in executed_cmds[0], \ f"Missing --npernode 2 in: {executed_cmds[0]}" - def test_wrapper_receives_option_seed_output_cache(self, bm, fake_agg_result): - """Wrapper command must include --option, --seed, --base-output-dir, --cache-dir (DIST-07).""" + def test_wrapper_receives_rank_bases_and_seed_base(self, bm, fake_agg_result): + """Wrapper command must include --rank-output-base, --rank-cache-base, --seed-base (DIST-07). + + The wrapper API was reshaped for #498/#500: the wrapper no longer takes + --option and no longer encodes WORKLOAD_PARAMS. Per-option kv-cache.py + args are emitted by mlpstorage_py.benchmarks.kvcache and pass through + the wrapper via parse_known_args.""" bm.args.trials = 1 bm.args.inter_option_delay = 0 bm.args.seed = 42 @@ -852,10 +857,11 @@ def fake_execute(cmd, **kwargs): patch.object(bm, 'write_metadata'): bm._execute_run() cmd0 = executed_cmds[0] - assert '--option 1' in cmd0, f"Missing --option 1 in: {cmd0}" - assert '--seed 42' in cmd0, f"Missing --seed 42 in: {cmd0}" - assert '--base-output-dir' in cmd0, f"Missing --base-output-dir in: {cmd0}" - assert '--cache-dir /tmp/kv' in cmd0, f"Missing --cache-dir in: {cmd0}" + assert '--seed-base 42' in cmd0, f"Missing --seed-base 42 in: {cmd0}" + assert '--rank-output-base' in cmd0, f"Missing --rank-output-base in: {cmd0}" + assert '--rank-cache-base /tmp/kv' in cmd0, f"Missing --rank-cache-base in: {cmd0}" + # The legacy --option flag is no longer part of the wrapper API. + assert '--option ' not in cmd0, f"Stale --option flag in: {cmd0}" def test_per_option_trial_dirs_created(self, bm, fake_agg_result, tmp_path): """option_{N}/trial_{T}/ directories must be created.""" @@ -1131,3 +1137,152 @@ def test_open_seed_override_allowed(self, tmp_path): patch.object(bm, 'write_metadata'): rc = bm._execute_run() assert rc == 0 + + +class TestWorkloadParamsConstant: + """WORKLOAD_PARAMS lives in mlpstorage_py.benchmarks.kvcache — the single + source of truth for per-option MLPerf v3.0 workloads. Previously it lived + in kv_cache_benchmark/mlperf_wrapper.py; centralizing it here is what + closes issues #498 and #500.""" + + def test_options_are_1_2_3(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert set(WORKLOAD_PARAMS.keys()) == {1, 2, 3} + + def test_option1_model_is_8b(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert WORKLOAD_PARAMS[1]['model'] == 'llama3.1-8b' + + def test_option3_model_is_70b(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert WORKLOAD_PARAMS[3]['model'] == 'llama3.1-70b-instruct' + + def test_generation_mode_always_none(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + for opt in (1, 2, 3): + assert WORKLOAD_PARAMS[opt]['generation-mode'] == 'none' + + +class TestBuildOptionKvcacheArgs: + """Per-option kv-cache.py CLI args returned by _build_option_kvcache_args. + + Verifies CLOSED uses the mandated WORKLOAD_PARAMS verbatim and OPEN lets + user-set CLI flags supersede the per-option defaults — the behavior the + old in-wrapper squashing prevented (issues #498 and #500).""" + + def test_closed_emits_workload_params_verbatim_for_option1(self, tmp_path): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + # Even if args carry non-default values, CLOSED ignores them. + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 999 + out = bm._build_option_kvcache_args(1, is_closed=True) + assert '--model' in out and out[out.index('--model') + 1] == WORKLOAD_PARAMS[1]['model'] + assert '--num-users' in out and out[out.index('--num-users') + 1] == str(WORKLOAD_PARAMS[1]['num-users']) + + def test_closed_emits_option3_70b_model(self, tmp_path): + bm = _make_run_benchmark(tmp_path) + out = bm._build_option_kvcache_args(3, is_closed=True) + assert out[out.index('--model') + 1] == 'llama3.1-70b-instruct' + + def test_open_user_args_override_defaults(self, tmp_path): + """Issue #498: in OPEN, user CLI args must reach kv-cache.py.""" + bm = _make_run_benchmark(tmp_path) + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 42 + bm.args.duration = 600 + bm.args.gpu_mem_gb = 8 + bm.args.cpu_mem_gb = 16 + bm.args.generation_mode = 'realistic' + out = bm._build_option_kvcache_args(1, is_closed=False) + assert out[out.index('--model') + 1] == 'llama3.1-70b-instruct' + assert out[out.index('--num-users') + 1] == '42' + assert out[out.index('--duration') + 1] == '600' + assert out[out.index('--gpu-mem-gb') + 1] == '8' + assert out[out.index('--cpu-mem-gb') + 1] == '16' + assert out[out.index('--generation-mode') + 1] == 'realistic' + + def test_open_falls_back_to_workload_params_when_args_missing(self, tmp_path): + """In OPEN, attributes the user did not set come from WORKLOAD_PARAMS.""" + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + for attr in ('model', 'num_users', 'duration', 'gpu_mem_gb', + 'cpu_mem_gb', 'generation_mode'): + if hasattr(bm.args, attr): + delattr(bm.args, attr) + out = bm._build_option_kvcache_args(2, is_closed=False) + assert out[out.index('--model') + 1] == WORKLOAD_PARAMS[2]['model'] + assert out[out.index('--num-users') + 1] == str(WORKLOAD_PARAMS[2]['num-users']) + assert out[out.index('--cpu-mem-gb') + 1] == str(WORKLOAD_PARAMS[2]['cpu-mem-gb']) + + def test_max_concurrent_allocs_always_from_workload_params(self, tmp_path): + """max-concurrent-allocs is not user-exposed even in OPEN — it + always tracks the per-option WORKLOAD_PARAMS value.""" + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + out_open = bm._build_option_kvcache_args(3, is_closed=False) + out_closed = bm._build_option_kvcache_args(3, is_closed=True) + expected = str(WORKLOAD_PARAMS[3]['max-concurrent-allocs']) + assert out_open[out_open.index('--max-concurrent-allocs') + 1] == expected + assert out_closed[out_closed.index('--max-concurrent-allocs') + 1] == expected + + +class TestWrapperCommandForwardsPerOptionArgs: + """End-to-end check that the wrapper invocation built by _execute_run + contains the per-option model/num-users/etc., so kv-cache.py receives + them downstream of mlperf_wrapper.py.""" + + def _capture_first_cmd(self, bm, fake_agg_result): + executed = [] + def fake_execute(cmd, **kwargs): + executed.append(cmd) + return ('', '', 0) + with patch.object(bm, '_execute_command', side_effect=fake_execute), \ + patch.object(bm, '_interruptible_sleep'), \ + patch.object(bm, '_aggregate_option_results', return_value=fake_agg_result), \ + patch.object(bm, '_write_run_summary'), \ + patch.object(bm, 'write_metadata'): + bm._execute_run() + return executed[0] + + def _fake_agg(self): + return { + 'option': 1, 'aggregated_read_bandwidth_gbps': 0.0, + 'aggregated_write_bandwidth_gbps': 0.0, + 'aggregated_avg_throughput_tokens_per_sec': 0.0, + 'aggregated_storage_throughput_tokens_per_sec': 0.0, + 'aggregated_p95_latency_ms': 0.0, + 'rank_count': 1, 'trial_count': 1, + 'partial_failure': False, 'missing_files': [], 'cpu_tier_ranks': [], + } + + def test_closed_option1_wrapper_cmd_contains_8b_model(self, tmp_path): + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'closed' + # CLOSED requires trials=3 and inter_option_delay=20 (the fixture's + # values); overriding them would trigger CLOSED enforcement and + # short-circuit before any command is built. + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--model llama3.1-8b' in cmd0 + assert '--num-users 200' in cmd0 + assert '--max-concurrent-allocs 16' in cmd0 + + def test_open_user_model_appears_in_wrapper_cmd(self, tmp_path): + """Issue #498: user --model must reach the wrapper-built command.""" + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'open' + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 33 + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--model llama3.1-70b-instruct' in cmd0 + assert '--num-users 33' in cmd0 + + def test_wrapper_cmd_contains_config_path(self, tmp_path): + """mlpstorage now owns the config path; it must be passed to the wrapper.""" + bm = _make_run_benchmark(tmp_path) + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--config' in cmd0 diff --git a/tests/unit/test_mlperf_wrapper.py b/tests/unit/test_mlperf_wrapper.py index 30fa622d..228e3d48 100755 --- a/tests/unit/test_mlperf_wrapper.py +++ b/tests/unit/test_mlperf_wrapper.py @@ -2,8 +2,9 @@ Tests cover: - get_rank(): env var reading, OMPI precedence over PMI, invalid value fallback -- BASE_SEED and WORKLOAD_PARAMS constants -- main(): rank dir creation, effective seed, output flag, option params, config path +- BASE_SEED constant +- main(): rank dir creation, effective seed, output flag, forwarded args + pass-through, per-rank overrides come last (argparse-style override) """ import importlib.util @@ -60,32 +61,17 @@ class TestBaseConstants: def test_base_seed_is_42(self, wrapper_module): assert wrapper_module.BASE_SEED == 42 - -class TestOptionParams: - """Tests for WORKLOAD_PARAMS fixed MLPerf values.""" - - def test_option_keys_are_1_2_3(self, wrapper_module): - assert set(wrapper_module.WORKLOAD_PARAMS.keys()) == {1, 2, 3} - - def test_option1_model_is_8b(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[1]['model'] == 'llama3.1-8b' - - def test_option3_model_is_70b(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[3]['model'] == 'llama3.1-70b-instruct' - - def test_generation_mode_always_none(self, wrapper_module): - for opt in [1, 2, 3]: - assert wrapper_module.WORKLOAD_PARAMS[opt]['generation-mode'] == 'none' - - def test_option2_cpu_mem_gb_is_4(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[2]['cpu-mem-gb'] == 4 + def test_wrapper_does_not_define_workload_params(self, wrapper_module): + """WORKLOAD_PARAMS lives in mlpstorage_py.benchmarks.kvcache now; + the wrapper must remain a dumb rank-aware launcher.""" + assert not hasattr(wrapper_module, 'WORKLOAD_PARAMS') class TestMain: """Tests for main() — invokes subprocess and exits.""" def _run_main(self, wrapper_module, monkeypatch, tmp_path, extra_argv=None, - ompi_rank=None): + ompi_rank=None, start_delay=0, end_delay=0): """Helper: sets up env, patches argv+subprocess, calls main(), captures cmd.""" monkeypatch.delenv('OMPI_COMM_WORLD_RANK', raising=False) monkeypatch.delenv('PMI_RANK', raising=False) @@ -96,10 +82,11 @@ def _run_main(self, wrapper_module, monkeypatch, tmp_path, extra_argv=None, cache_dir = tmp_path / 'cache' argv = [ 'mlperf_wrapper.py', - '--option', '1', - '--seed', '42', - '--base-output-dir', str(output_dir), - '--cache-dir', str(cache_dir), + '--rank-output-base', str(output_dir), + '--rank-cache-base', str(cache_dir), + '--seed-base', '42', + '--start-delay', str(start_delay), + '--end-delay', str(end_delay), ] if extra_argv: argv.extend(extra_argv) @@ -130,7 +117,7 @@ def test_creates_rank_cache_dir(self, wrapper_module, monkeypatch, tmp_path): def test_effective_seed_is_base_plus_rank(self, wrapper_module, monkeypatch, tmp_path): cmd, _, _ = self._run_main( wrapper_module, monkeypatch, tmp_path, - extra_argv=['--seed', '10'], + extra_argv=['--seed-base', '10'], ompi_rank=2, ) seed_idx = cmd.index('--seed') @@ -143,19 +130,44 @@ def test_output_flag_points_to_rank_dir(self, wrapper_module, monkeypatch, tmp_p assert 'rank_0' in output_val assert 'kvcache_results_' in output_val - def test_workload_params_injected_for_option1(self, wrapper_module, monkeypatch, tmp_path): - cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) - assert '--model' in cmd - model_idx = cmd.index('--model') - assert cmd[model_idx + 1] == 'llama3.1-8b' + def test_cache_dir_points_to_rank_cache_dir(self, wrapper_module, monkeypatch, tmp_path): + cmd, _, cache_dir = self._run_main(wrapper_module, monkeypatch, tmp_path) + cache_idx = cmd.index('--cache-dir') + cache_val = cmd[cache_idx + 1] + assert str(cache_dir / 'rank_0') == cache_val - def test_config_default_is_adjacent_config_yaml(self, wrapper_module, monkeypatch, tmp_path): - cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) - assert any('config.yaml' in arg for arg in cmd) - - def test_config_override_accepted(self, wrapper_module, monkeypatch, tmp_path): + def test_forwarded_args_passed_through_verbatim(self, wrapper_module, monkeypatch, tmp_path): + """Wrapper forwards unrecognized args to kv-cache.py without modification.""" cmd, _, _ = self._run_main( wrapper_module, monkeypatch, tmp_path, - extra_argv=['--config', '/custom/config.yaml'], + extra_argv=['--model', 'llama3.1-8b', '--num-users', '200', + '--config', '/tmp/cfg.yaml'], ) - assert '/custom/config.yaml' in cmd + assert '--model' in cmd + assert cmd[cmd.index('--model') + 1] == 'llama3.1-8b' + assert '--num-users' in cmd + assert cmd[cmd.index('--num-users') + 1] == '200' + assert '--config' in cmd + assert cmd[cmd.index('--config') + 1] == '/tmp/cfg.yaml' + + def test_per_rank_overrides_come_last(self, wrapper_module, monkeypatch, tmp_path): + """Per-rank --seed/--output/--cache-dir are appended after forwarded + args so argparse store-action picks them over any duplicates.""" + cmd, _, _ = self._run_main( + wrapper_module, monkeypatch, tmp_path, + extra_argv=['--seed', '999', '--cache-dir', '/forwarded/cache'], + ) + # Last --seed wins; wrapper's effective seed (42 + rank 0 = 42) + # must appear AFTER the forwarded --seed. + seed_positions = [i for i, x in enumerate(cmd) if x == '--seed'] + assert len(seed_positions) == 2 + assert cmd[seed_positions[-1] + 1] == '42' + + cache_positions = [i for i, x in enumerate(cmd) if x == '--cache-dir'] + assert len(cache_positions) == 2 + assert '/forwarded/cache' not in cmd[cache_positions[-1] + 1] + + def test_no_config_default_injected(self, wrapper_module, monkeypatch, tmp_path): + """Wrapper no longer falls back to an adjacent config.yaml — caller owns the path.""" + cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) + assert '--config' not in cmd