diff --git a/kv_cache_benchmark/mlperf_wrapper.py b/kv_cache_benchmark/mlperf_wrapper.py index dc3a5296..896616ae 100755 --- a/kv_cache_benchmark/mlperf_wrapper.py +++ b/kv_cache_benchmark/mlperf_wrapper.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 -"""MPI-rank-aware wrapper for MLPerf KV Cache benchmark. +"""MPI-rank-aware launcher for the MLPerf KV Cache benchmark. Invoked by mpirun per-rank; reads OMPI_COMM_WORLD_RANK (OpenMPI) or PMI_RANK -(MPICH) to determine this rank's index. Computes per-rank seed, output -directory, and cache directory, then invokes kv-cache.py with the fixed MLPerf -v3.0 parameters for the specified option. +(MPICH) to determine this rank's index. The wrapper does NOT encode any +workload parameters — it only computes per-rank values (seed, output file, +cache directory) and forwards everything else to kv-cache.py. The caller +(mlpstorage_py.benchmarks.kvcache.KVCacheBenchmark) owns the per-option +workload parameter set and CLOSED/OPEN enforcement. """ import argparse @@ -18,40 +20,6 @@ BASE_SEED = 42 TEST_DELAY = 90 -# MLPerf v3.0 fixed parameters per option. -# All numeric values stored as int/float; converted to str when building cmd. -# 'generation-mode' is ALWAYS 'none' for MLPerf compliance — do NOT rely on -# kv-cache.py defaults; the default may change in future versions. -WORKLOAD_PARAMS = { - 1: { - 'model': 'llama3.1-8b', - 'num-users': 200, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 0, - 'max-concurrent-allocs': 16, - 'generation-mode': 'none', - }, - 2: { - 'model': 'llama3.1-8b', - 'num-users': 100, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 4, - 'max-concurrent-allocs': 16, - 'generation-mode': 'none', - }, - 3: { - 'model': 'llama3.1-70b-instruct', - 'num-users': 70, - 'duration': 300, - 'gpu-mem-gb': 0, - 'cpu-mem-gb': 0, - 'max-concurrent-allocs': 4, - 'generation-mode': 'none', - }, -} - def get_rank() -> int: """Read global MPI rank from environment (no mpi4py). @@ -79,50 +47,57 @@ def get_rank() -> int: def main(): + # allow_abbrev=False so forwarded kv-cache.py flags like --seed are not + # silently swallowed by a prefix match against --seed-base. parser = argparse.ArgumentParser( - description="MLPerf KV Cache MPI-rank-aware wrapper" + description="MLPerf KV Cache MPI-rank-aware launcher", + allow_abbrev=False, ) parser.add_argument( - '--option', - type=int, - choices=[1, 2, 3], + '--rank-output-base', + type=str, required=True, - help="MLPerf v3.0 option (1=Max Storage Stress, 2=Storage Throughput, 3=Large Model 70B).", + dest='rank_output_base', + help="Base output directory. Per-rank results written to /rank_/.", ) parser.add_argument( - '--seed', + '--rank-cache-base', + type=str, + required=True, + dest='rank_cache_base', + help="Base cache directory. Per-rank cache written to /rank_/.", + ) + parser.add_argument( + '--seed-base', type=int, default=BASE_SEED, + dest='seed_base', help=f"Base random seed (default: {BASE_SEED}). Effective seed = base + rank.", ) parser.add_argument( - '--base-output-dir', - type=str, - required=True, - dest='base_output_dir', - help="Base output directory. Per-rank results written to /rank_/.", - ) - parser.add_argument( - '--cache-dir', - type=str, - required=True, - dest='cache_dir', - help="Base cache directory. Per-rank cache written to /rank_/.", + '--start-delay', + type=int, + default=TEST_DELAY, + dest='start_delay', + help=f"Seconds to sleep before invoking kv-cache.py (default: {TEST_DELAY}).", ) parser.add_argument( - '--config', - type=str, - default=None, - help="Path to YAML config file. Defaults to config.yaml adjacent to this script (D-02).", + '--end-delay', + type=int, + default=TEST_DELAY, + dest='end_delay', + help=f"Seconds to sleep after kv-cache.py exits (default: {TEST_DELAY}).", ) - args = parser.parse_args() + # Wrapper-specific flags are consumed; everything else is forwarded to + # kv-cache.py verbatim (including --config, --model, --num-users, etc.). + args, forwarded = parser.parse_known_args() rank = get_rank() - effective_seed = args.seed + rank + effective_seed = args.seed_base + rank - rank_output_dir = Path(args.base_output_dir) / f"rank_{rank}" - rank_cache_dir = Path(args.cache_dir) / f"rank_{rank}" + rank_output_dir = Path(args.rank_output_base) / f"rank_{rank}" + rank_cache_dir = Path(args.rank_cache_base) / f"rank_{rank}" rank_output_dir.mkdir(parents=True, exist_ok=True) rank_cache_dir.mkdir(parents=True, exist_ok=True) @@ -130,38 +105,26 @@ def main(): ts = datetime.now().strftime('%Y%m%d_%H%M%S') output_file = rank_output_dir / f"kvcache_results_{ts}.json" - # D-01: kv-cache.py located relative to this script; both share kv_cache_benchmark/ kvcache_script = Path(__file__).parent / 'kv-cache.py' - # D-02: config.yaml located relative to this script; user may override via --config - config_path = args.config or str(Path(__file__).parent / 'config.yaml') - - params = WORKLOAD_PARAMS[args.option] + # Per-rank seed/output/cache are appended last so argparse store-action + # uses them over any duplicates that came in via --forwarded args. cmd = [ sys.executable, str(kvcache_script), - '--config', config_path, + *forwarded, '--seed', str(effective_seed), '--output', str(output_file), '--cache-dir', str(rank_cache_dir), - '--model', params['model'], - '--num-users', str(params['num-users']), - '--duration', str(params['duration']), - '--gpu-mem-gb', str(params['gpu-mem-gb']), - '--cpu-mem-gb', str(params['cpu-mem-gb']), - '--max-concurrent-allocs', str(params['max-concurrent-allocs']), - # '--generation-mode' is passed EXPLICITLY — do NOT omit even though the - # value is always 'none'. kv-cache.py defaults may change in future versions. - '--generation-mode', params['generation-mode'], ] - print(f"KV Cache Wrapper - Start delay for {TEST_DELAY} seconds") - time.sleep(TEST_DELAY) + print(f"KV Cache Wrapper - Start delay for {args.start_delay} seconds") + time.sleep(args.start_delay) print(f"KV Cache Wrapper - Starting benchmark pass...") result = subprocess.run(cmd) - print(f"KV Cache Wrapper - End delay for {TEST_DELAY} seconds") - time.sleep(TEST_DELAY) + print(f"KV Cache Wrapper - End delay for {args.end_delay} seconds") + time.sleep(args.end_delay) print(f"KV Cache Wrapper - Finished benchmark pass") sys.exit(result.returncode) diff --git a/mlpstorage_py/__init__.py b/mlpstorage_py/__init__.py index f2cf0746..c772a6b4 100755 --- a/mlpstorage_py/__init__.py +++ b/mlpstorage_py/__init__.py @@ -2,11 +2,15 @@ import pathlib import tomllib # stdlib since Python 3.11; project requires >=3.12 +# Distribution name. Must match `project.name` in pyproject.toml — tested in +# tests/unit/test_version.py::test_dist_name_matches_pyproject. +_DIST_NAME = "mlpstorage" + def _resolve_version() -> str: - # Primary: installed distribution metadata (correct dist name is "mlpstorage") + # Primary: installed distribution metadata try: - return _pkg_version("mlpstorage") + return _pkg_version(_DIST_NAME) except _PkgNF: pass # Fallback: parse pyproject.toml for source-checkout usage diff --git a/mlpstorage_py/benchmarks/dlio.py b/mlpstorage_py/benchmarks/dlio.py index e7b0d2ab..fb722a44 100755 --- a/mlpstorage_py/benchmarks/dlio.py +++ b/mlpstorage_py/benchmarks/dlio.py @@ -355,6 +355,11 @@ def generate_dlio_command(self): self.args.oversubscribe, self.args.allow_run_as_root, self.args.mpi_params, self.logger, mpi_btl=getattr(self.args, 'mpi_btl', 'auto')) + # Forward DLIO_DROP_CACHES_TIMEOUT to ranks so multi-host runs honor + # the operator's CLI choice (mlcommons/storage #487). OpenMPI does + # not forward arbitrary env vars by default; -x VAR opts VAR in. + if 'DLIO_DROP_CACHES_TIMEOUT' in os.environ: + mpi_prefix += " -x DLIO_DROP_CACHES_TIMEOUT" cmd = f"{mpi_prefix} {cmd}" return cmd @@ -370,6 +375,13 @@ class TrainingBenchmark(DLIOBenchmark): def __init__(self, args, **kwargs): super().__init__(args, **kwargs) + # Plumb --drop-caches-timeout-seconds into DLIO via env var + # (mlcommons/storage #487). Only the `run` subcommand registers the + # flag, so this is a no-op for datasize/datagen/configview. + timeout = getattr(args, 'drop_caches_timeout_seconds', None) + if timeout is not None: + os.environ['DLIO_DROP_CACHES_TIMEOUT'] = str(timeout) + # This allows each command to map to a specific wrapper method. When methods are created, replace the default # 'self.execute_command' with the command-specific method (like "self._datasize()") self.command_method_map = dict( diff --git a/mlpstorage_py/benchmarks/kvcache.py b/mlpstorage_py/benchmarks/kvcache.py index 42dd423f..06c26919 100755 --- a/mlpstorage_py/benchmarks/kvcache.py +++ b/mlpstorage_py/benchmarks/kvcache.py @@ -33,6 +33,40 @@ from mlpstorage_py.utils import generate_mpi_prefix_cmd, MLPSJsonEncoder +# MLPerf v3.0 fixed per-option workload parameters. Single source of truth for +# what option N means at the kv-cache.py level. In CLOSED these are mandated; +# in OPEN they act as per-option defaults that user CLI flags supersede. +WORKLOAD_PARAMS = { + 1: { + 'model': 'llama3.1-8b', + 'num-users': 200, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 0, + 'max-concurrent-allocs': 16, + 'generation-mode': 'none', + }, + 2: { + 'model': 'llama3.1-8b', + 'num-users': 100, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 4, + 'max-concurrent-allocs': 16, + 'generation-mode': 'none', + }, + 3: { + 'model': 'llama3.1-70b-instruct', + 'num-users': 70, + 'duration': 300, + 'gpu-mem-gb': 0, + 'cpu-mem-gb': 0, + 'max-concurrent-allocs': 4, + 'generation-mode': 'none', + }, +} + + class KVCacheBenchmark(Benchmark): """KV Cache benchmark for LLM inference storage. @@ -234,6 +268,8 @@ def _execute_run(self) -> int: ) wrapper_path = Path(self.kvcache_bin_path).parent / 'mlperf_wrapper.py' + # Wrapper-adjacent config.yaml is the default; CLOSED forbids overriding. + config_path = config or str(Path(self.kvcache_bin_path).parent / 'config.yaml') mpi_prefix = generate_mpi_prefix_cmd( mpi_cmd=getattr(self.args, 'mpi_bin', 'mpirun'), @@ -248,6 +284,7 @@ def _execute_run(self) -> int: option_results = {} for option in [1, 2, 3]: + option_kv_args = self._build_option_kvcache_args(option, is_closed) trial_dirs = [] for trial in range(trials): @@ -258,13 +295,12 @@ def _execute_run(self) -> int: wrapper_cmd = ( f"{mpi_prefix} {sys.executable} {wrapper_path}" - f" --option {option}" - f" --seed {seed}" - f" --base-output-dir {option_trial_dir}" - f" --cache-dir {cache_dir}" + f" --rank-output-base {option_trial_dir}" + f" --rank-cache-base {cache_dir}" + f" --seed-base {seed}" + f" --config {config_path}" + f" {' '.join(option_kv_args)}" ) - if config: - wrapper_cmd += f" --config {config}" self.logger.status(f"Running option {option} trial {trial + 1}/{trials}...") self._execute_command( @@ -292,6 +328,53 @@ def _execute_run(self) -> int: self.write_cluster_info() return 0 + def _build_option_kvcache_args(self, option: int, is_closed: bool) -> List[str]: + """Return the kv-cache.py CLI args for this option. + + CLOSED: emits WORKLOAD_PARAMS[option] verbatim — MLPerf-mandated, no + user input can reach kv-cache.py through this path because the CLOSED + CLI does not expose the corresponding flags. + + OPEN: user-set flags supersede WORKLOAD_PARAMS[option] one key at a + time. max-concurrent-allocs is not exposed by the OPEN CLI, so it + always comes from WORKLOAD_PARAMS. + """ + defaults = WORKLOAD_PARAMS[option] + if is_closed: + params = dict(defaults) + else: + params = { + 'model': getattr(self.args, 'model', None) or defaults['model'], + 'num-users': ( + getattr(self.args, 'num_users', None) + if getattr(self.args, 'num_users', None) is not None + else defaults['num-users'] + ), + 'duration': ( + getattr(self.args, 'duration', None) + if getattr(self.args, 'duration', None) is not None + else defaults['duration'] + ), + 'gpu-mem-gb': ( + getattr(self.args, 'gpu_mem_gb', None) + if getattr(self.args, 'gpu_mem_gb', None) is not None + else defaults['gpu-mem-gb'] + ), + 'cpu-mem-gb': ( + getattr(self.args, 'cpu_mem_gb', None) + if getattr(self.args, 'cpu_mem_gb', None) is not None + else defaults['cpu-mem-gb'] + ), + 'max-concurrent-allocs': defaults['max-concurrent-allocs'], + 'generation-mode': ( + getattr(self.args, 'generation_mode', None) or defaults['generation-mode'] + ), + } + out = [] + for key, value in params.items(): + out.extend([f'--{key}', str(value)]) + return out + def _execute_datasize(self) -> int: """Calculate memory requirements for KV cache. diff --git a/mlpstorage_py/cli/training_args.py b/mlpstorage_py/cli/training_args.py index dccf609e..768e6b21 100755 --- a/mlpstorage_py/cli/training_args.py +++ b/mlpstorage_py/cli/training_args.py @@ -5,6 +5,7 @@ including datasize, datagen, run, and configview commands. """ +import argparse import sys from mlpstorage_py.config import ( @@ -23,6 +24,22 @@ ) +def _positive_int(raw: str) -> int: + """argparse type: accept positive ints; reject 0, negative, and non-numeric. + + Used by --drop-caches-timeout-seconds. DLIO clamps to a minimum of 1 + on its end too, but rejecting at the CLI boundary gives a clearer error + message than `subprocess.run(timeout=0)` would later. + """ + try: + value = int(raw) + except (TypeError, ValueError): + raise argparse.ArgumentTypeError(f"expected positive integer, got {raw!r}") + if value < 1: + raise argparse.ArgumentTypeError(f"expected positive integer (>= 1), got {value}") + return value + + def add_training_arguments(parser, mode): """Add training benchmark arguments to the parser. @@ -162,6 +179,26 @@ def _add_training_core_args(parser, command, accel_choices): add_dlio_arguments(parser) + # Training `run` only: per-call timeout for DLIO's per-epoch page-cache + # flush. Deployment knob (not a submission tunable), so it's exposed in + # every mode like --dlio-bin-path. Plumbed through to DLIO via the + # DLIO_DROP_CACHES_TIMEOUT env var. See mlcommons/storage #487. + if command == "run": + parser.add_argument( + '--drop-caches-timeout-seconds', + type=_positive_int, + default=None, + metavar='SECONDS', + help=( + "Per-call timeout for the per-epoch page-cache flush " + "(`sudo -n sh -c 'echo 3 > /proc/sys/vm/drop_caches'`). " + "Default is DLIO's built-in 30s. Raise this on large-RAM " + "hosts where the kernel needs longer to drop caches " + "(e.g. 300). Plumbed through to DLIO via the " + "DLIO_DROP_CACHES_TIMEOUT env var." + ), + ) + # --params is allowed in CLOSED mode for the parameters listed in # TrainingRunRulesChecker.CLOSED_ALLOWED_PARAMS (e.g. dataset.num_files_train, # dataset.num_subfolders_train). Register it in the core args so closed diff --git a/pyproject.toml b/pyproject.toml index d5bffc1c..e3ed54fc 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlpstorage" -version = "3.0.16" +version = "3.0.17" description = "MLPerf Storage Benchmark Suite" readme = "README.md" license = {text = "Apache-2.0"} @@ -126,15 +126,19 @@ environments = ["sys_platform == 'linux'"] torch = [{ index = "pytorch-cpu" }] torchvision = [{ index = "pytorch-cpu" }] torchaudio = [{ index = "pytorch-cpu" }] -# Pinned to upstream main @ PR #27 merge — carries: -# - DLIO PR #21 fix for storage #391 part 1 (TorchIterableDatasetSimple gating) -# - DLIO PR #22 fix for storage #415 (mpi4py auto-init in spawn workers) -# - DLIO PR #23 fix for storage #391 part 2 (sudo -n + warn-once for drop_caches) -# - DLIO PR #24 fix for storage #448 (per-node worker-memory budget) -# - DLIO PR #25 fix for storage #455 (sampler floor division — equal per-rank shards) +# Tracks upstream main. Reproducibility is anchored by the resolved hash +# recorded in uv.lock; bump by re-running `uv lock --upgrade-package dlio-benchmark`. +# Current main carries (most recent first): +# - DLIO PR #28 fix for storage #487: expose DLIO_DROP_CACHES_TIMEOUT, +# distinguish slow flush from sudo refusal # - DLIO PR #27 fix for storage #466 #472: skip_listing, S3 object storage, # epoch-2+ AU, TFRecord via s3dlio, large-scale listing OOM prevention -dlio-benchmark = { git = "https://github.com/mlcommons/DLIO_local_changes.git", rev = "7098cccdb1fe35ab6487705f33f229e3c125dd44" } +# - DLIO PR #25 fix for storage #455 (sampler floor division — equal per-rank shards) +# - DLIO PR #24 fix for storage #448 (per-node worker-memory budget) +# - DLIO PR #23 fix for storage #391 part 2 (sudo -n + warn-once for drop_caches) +# - DLIO PR #22 fix for storage #415 (mpi4py auto-init in spawn workers) +# - DLIO PR #21 fix for storage #391 part 1 (TorchIterableDatasetSimple gating) +dlio-benchmark = { git = "https://github.com/mlcommons/DLIO_local_changes.git", branch = "main" } [dependency-groups] dev = [ diff --git a/tests/integration/test_benchmark_flow.py b/tests/integration/test_benchmark_flow.py index 00c6b61c..1b99e435 100755 --- a/tests/integration/test_benchmark_flow.py +++ b/tests/integration/test_benchmark_flow.py @@ -623,7 +623,7 @@ def test_run_all_options_mock_mpi_writes_summary(self, tmp_path): bm = self._make_bm(tmp_path, what_if=False) def fake_execute(cmd, **kwargs): - m = re.search(r'--base-output-dir\s+(\S+)', cmd) + m = re.search(r'--rank-output-base\s+(\S+)', cmd) if m: trial_dir = m.group(1) rank_dir = Path(trial_dir) / 'rank_0' diff --git a/tests/unit/test_benchmarks_kvcache.py b/tests/unit/test_benchmarks_kvcache.py index 818cf404..0a4afee5 100755 --- a/tests/unit/test_benchmarks_kvcache.py +++ b/tests/unit/test_benchmarks_kvcache.py @@ -727,7 +727,7 @@ class TestExecuteRun: - _execute_command called 3 times per run (once per option) with trials=1 (DIST-02, DIST-04) - mpirun command contains '--mca orte_abort_on_non_zero_status 0' (DIST-08) - mpirun command contains '--npernode N' (DIST-03) - - wrapper receives --option, --seed, --base-output-dir, --cache-dir (DIST-07) + - wrapper receives --seed-base, --rank-output-base, --rank-cache-base (DIST-07) - per-option/trial dirs created with correct naming (option_{N}/trial_{T}/) - _interruptible_sleep called 2 times (after options 1 and 2; not after 3) (DIST-05) - _aggregate_option_results called 3 times when what_if=False @@ -835,8 +835,13 @@ def fake_execute(cmd, **kwargs): assert '--npernode 2' in executed_cmds[0], \ f"Missing --npernode 2 in: {executed_cmds[0]}" - def test_wrapper_receives_option_seed_output_cache(self, bm, fake_agg_result): - """Wrapper command must include --option, --seed, --base-output-dir, --cache-dir (DIST-07).""" + def test_wrapper_receives_rank_bases_and_seed_base(self, bm, fake_agg_result): + """Wrapper command must include --rank-output-base, --rank-cache-base, --seed-base (DIST-07). + + The wrapper API was reshaped for #498/#500: the wrapper no longer takes + --option and no longer encodes WORKLOAD_PARAMS. Per-option kv-cache.py + args are emitted by mlpstorage_py.benchmarks.kvcache and pass through + the wrapper via parse_known_args.""" bm.args.trials = 1 bm.args.inter_option_delay = 0 bm.args.seed = 42 @@ -852,10 +857,11 @@ def fake_execute(cmd, **kwargs): patch.object(bm, 'write_metadata'): bm._execute_run() cmd0 = executed_cmds[0] - assert '--option 1' in cmd0, f"Missing --option 1 in: {cmd0}" - assert '--seed 42' in cmd0, f"Missing --seed 42 in: {cmd0}" - assert '--base-output-dir' in cmd0, f"Missing --base-output-dir in: {cmd0}" - assert '--cache-dir /tmp/kv' in cmd0, f"Missing --cache-dir in: {cmd0}" + assert '--seed-base 42' in cmd0, f"Missing --seed-base 42 in: {cmd0}" + assert '--rank-output-base' in cmd0, f"Missing --rank-output-base in: {cmd0}" + assert '--rank-cache-base /tmp/kv' in cmd0, f"Missing --rank-cache-base in: {cmd0}" + # The legacy --option flag is no longer part of the wrapper API. + assert '--option ' not in cmd0, f"Stale --option flag in: {cmd0}" def test_per_option_trial_dirs_created(self, bm, fake_agg_result, tmp_path): """option_{N}/trial_{T}/ directories must be created.""" @@ -873,7 +879,7 @@ def test_per_option_trial_dirs_created(self, bm, fake_agg_result, tmp_path): assert option1_trial0.exists(), f"Expected {option1_trial0} to exist" def test_option_trial_dirs_in_command_path(self, bm, fake_agg_result): - """Command must reference option_N/trial_T subdirectory in --base-output-dir.""" + """Command must reference option_N/trial_T subdirectory in --rank-output-base.""" bm.args.trials = 1 bm.args.inter_option_delay = 0 executed_cmds = [] @@ -1131,3 +1137,207 @@ def test_open_seed_override_allowed(self, tmp_path): patch.object(bm, 'write_metadata'): rc = bm._execute_run() assert rc == 0 + + +class TestWorkloadParamsConstant: + """WORKLOAD_PARAMS lives in mlpstorage_py.benchmarks.kvcache — the single + source of truth for per-option MLPerf v3.0 workloads. Previously it lived + in kv_cache_benchmark/mlperf_wrapper.py; centralizing it here is what + closes issues #498 and #500.""" + + def test_options_are_1_2_3(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert set(WORKLOAD_PARAMS.keys()) == {1, 2, 3} + + def test_option1_model_is_8b(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert WORKLOAD_PARAMS[1]['model'] == 'llama3.1-8b' + + def test_option3_model_is_70b(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + assert WORKLOAD_PARAMS[3]['model'] == 'llama3.1-70b-instruct' + + def test_generation_mode_always_none(self): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + for opt in (1, 2, 3): + assert WORKLOAD_PARAMS[opt]['generation-mode'] == 'none' + + +class TestBuildOptionKvcacheArgs: + """Per-option kv-cache.py CLI args returned by _build_option_kvcache_args. + + Verifies CLOSED uses the mandated WORKLOAD_PARAMS verbatim and OPEN lets + user-set CLI flags supersede the per-option defaults — the behavior the + old in-wrapper squashing prevented (issues #498 and #500).""" + + def test_closed_emits_workload_params_verbatim_for_option1(self, tmp_path): + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + # Even if args carry non-default values, CLOSED ignores them. + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 999 + out = bm._build_option_kvcache_args(1, is_closed=True) + assert '--model' in out and out[out.index('--model') + 1] == WORKLOAD_PARAMS[1]['model'] + assert '--num-users' in out and out[out.index('--num-users') + 1] == str(WORKLOAD_PARAMS[1]['num-users']) + + def test_closed_emits_option3_70b_model(self, tmp_path): + bm = _make_run_benchmark(tmp_path) + out = bm._build_option_kvcache_args(3, is_closed=True) + assert out[out.index('--model') + 1] == 'llama3.1-70b-instruct' + + def test_open_user_args_override_defaults(self, tmp_path): + """Issue #498: in OPEN, user CLI args must reach kv-cache.py.""" + bm = _make_run_benchmark(tmp_path) + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 42 + bm.args.duration = 600 + bm.args.gpu_mem_gb = 8 + bm.args.cpu_mem_gb = 16 + bm.args.generation_mode = 'realistic' + out = bm._build_option_kvcache_args(1, is_closed=False) + assert out[out.index('--model') + 1] == 'llama3.1-70b-instruct' + assert out[out.index('--num-users') + 1] == '42' + assert out[out.index('--duration') + 1] == '600' + assert out[out.index('--gpu-mem-gb') + 1] == '8' + assert out[out.index('--cpu-mem-gb') + 1] == '16' + assert out[out.index('--generation-mode') + 1] == 'realistic' + + def test_open_falls_back_to_workload_params_when_args_missing(self, tmp_path): + """In OPEN, attributes the user did not set come from WORKLOAD_PARAMS.""" + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + for attr in ('model', 'num_users', 'duration', 'gpu_mem_gb', + 'cpu_mem_gb', 'generation_mode'): + if hasattr(bm.args, attr): + delattr(bm.args, attr) + out = bm._build_option_kvcache_args(2, is_closed=False) + assert out[out.index('--model') + 1] == WORKLOAD_PARAMS[2]['model'] + assert out[out.index('--num-users') + 1] == str(WORKLOAD_PARAMS[2]['num-users']) + assert out[out.index('--cpu-mem-gb') + 1] == str(WORKLOAD_PARAMS[2]['cpu-mem-gb']) + + def test_max_concurrent_allocs_always_from_workload_params(self, tmp_path): + """max-concurrent-allocs is not user-exposed even in OPEN — it + always tracks the per-option WORKLOAD_PARAMS value.""" + from mlpstorage_py.benchmarks.kvcache import WORKLOAD_PARAMS + bm = _make_run_benchmark(tmp_path) + out_open = bm._build_option_kvcache_args(3, is_closed=False) + out_closed = bm._build_option_kvcache_args(3, is_closed=True) + expected = str(WORKLOAD_PARAMS[3]['max-concurrent-allocs']) + assert out_open[out_open.index('--max-concurrent-allocs') + 1] == expected + assert out_closed[out_closed.index('--max-concurrent-allocs') + 1] == expected + + +class TestWrapperCommandForwardsPerOptionArgs: + """End-to-end check that the wrapper invocation built by _execute_run + contains the per-option model/num-users/etc., so kv-cache.py receives + them downstream of mlperf_wrapper.py.""" + + def _capture_first_cmd(self, bm, fake_agg_result): + executed = [] + def fake_execute(cmd, **kwargs): + executed.append(cmd) + return ('', '', 0) + with patch.object(bm, '_execute_command', side_effect=fake_execute), \ + patch.object(bm, '_interruptible_sleep'), \ + patch.object(bm, '_aggregate_option_results', return_value=fake_agg_result), \ + patch.object(bm, '_write_run_summary'), \ + patch.object(bm, 'write_metadata'): + bm._execute_run() + return executed[0] + + def _fake_agg(self): + return { + 'option': 1, 'aggregated_read_bandwidth_gbps': 0.0, + 'aggregated_write_bandwidth_gbps': 0.0, + 'aggregated_avg_throughput_tokens_per_sec': 0.0, + 'aggregated_storage_throughput_tokens_per_sec': 0.0, + 'aggregated_p95_latency_ms': 0.0, + 'rank_count': 1, 'trial_count': 1, + 'partial_failure': False, 'missing_files': [], 'cpu_tier_ranks': [], + } + + def test_closed_option1_wrapper_cmd_contains_8b_model(self, tmp_path): + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'closed' + # CLOSED requires trials=3 and inter_option_delay=20 (the fixture's + # values); overriding them would trigger CLOSED enforcement and + # short-circuit before any command is built. + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--model llama3.1-8b' in cmd0 + assert '--num-users 200' in cmd0 + assert '--max-concurrent-allocs 16' in cmd0 + + def test_open_user_model_appears_in_wrapper_cmd(self, tmp_path): + """Issue #498: user --model must reach the wrapper-built command.""" + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'open' + bm.args.model = 'llama3.1-70b-instruct' + bm.args.num_users = 33 + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--model llama3.1-70b-instruct' in cmd0 + assert '--num-users 33' in cmd0 + + def test_wrapper_cmd_contains_config_path(self, tmp_path): + """mlpstorage now owns the config path; it must be passed to the wrapper.""" + bm = _make_run_benchmark(tmp_path) + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--config' in cmd0 + + def _capture_all_cmds(self, bm, fake_agg_result): + """Capture every per-option/per-trial wrapper command built by _execute_run.""" + executed = [] + def fake_execute(cmd, **kwargs): + executed.append(cmd) + return ('', '', 0) + with patch.object(bm, '_execute_command', side_effect=fake_execute), \ + patch.object(bm, '_interruptible_sleep'), \ + patch.object(bm, '_aggregate_option_results', return_value=fake_agg_result), \ + patch.object(bm, '_write_run_summary'), \ + patch.object(bm, 'write_metadata'): + bm._execute_run() + return executed + + def test_open_option3_defaults_to_70b_when_user_did_not_override(self, tmp_path): + """OPEN with no --model override: option 3 must still receive the + MLPerf-mandated llama70b model from WORKLOAD_PARAMS[3]. Closes the + gap between _build_option_kvcache_args (covered) and the actual + wrapper command (was uncovered).""" + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'open' + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + # Remove the fixture's model/num-users defaults so the fallback path + # in _build_option_kvcache_args is exercised — this mirrors the case + # where the OPEN user invoked the run without those flags. + for attr in ('model', 'num_users'): + if hasattr(bm.args, attr): + delattr(bm.args, attr) + cmds = self._capture_all_cmds(bm, self._fake_agg()) + assert len(cmds) == 3, f"Expected 3 commands (one per option), got {len(cmds)}" + # cmds[0] is option 1, cmds[1] is option 2, cmds[2] is option 3 + assert '--model llama3.1-8b' in cmds[0] + assert '--num-users 200' in cmds[0] + assert '--model llama3.1-8b' in cmds[1] + assert '--num-users 100' in cmds[1] + assert '--model llama3.1-70b-instruct' in cmds[2] + assert '--num-users 70' in cmds[2] + + def test_open_user_config_path_supersedes_wrapper_default(self, tmp_path): + """OPEN: when the user provides --config /custom/path.yaml, mlpstorage + forwards that exact path to the wrapper instead of the kv-cache.py + adjacent default. (CLOSED rejects --config — covered separately.)""" + bm = _make_run_benchmark(tmp_path) + bm.args.mode = 'open' + bm.args.config = '/custom/path/user.yaml' + bm.args.trials = 1 + bm.args.inter_option_delay = 0 + cmd0 = self._capture_first_cmd(bm, self._fake_agg()) + assert '--config /custom/path/user.yaml' in cmd0, \ + f"User config path missing from cmd: {cmd0}" + # And the wrapper-adjacent default must NOT also be present. + assert 'kv_cache_benchmark/config.yaml' not in cmd0, \ + f"Default config path leaked into cmd alongside user override: {cmd0}" diff --git a/tests/unit/test_drop_caches_timeout_cli.py b/tests/unit/test_drop_caches_timeout_cli.py new file mode 100644 index 00000000..fd146f90 --- /dev/null +++ b/tests/unit/test_drop_caches_timeout_cli.py @@ -0,0 +1,271 @@ +"""Tests for the --drop-caches-timeout-seconds CLI flag and env-var plumbing. + +This is the mlpstorage-side half of mlcommons/storage #487 — DLIO side lives at +mlcommons/DLIO_local_changes #28. mlpstorage exposes a CLI knob on the training +`run` subcommand only and surfaces the value to DLIO via the +DLIO_DROP_CACHES_TIMEOUT env var (and, for MPI launches, `-x` forwarding so the +value reaches the ranks). +""" + +import argparse +from unittest.mock import patch + +import pytest + +from mlpstorage_py.cli_parser import parse_arguments +from mlpstorage_py.config import EXIT_CODE + + +# Common training-run argv prefix used across tests. +_TRAINING_RUN_BASE = [ + 'mlpstorage', 'closed', 'training', 'unet3d', 'run', + '-cm', '64', '-at', 'b200', '-na', '4', '-dd', '/tmp', '-rd', '/tmp', 'file', +] + + +# --------------------------------------------------------------------------- +# Flag scope: training `run` only +# --------------------------------------------------------------------------- + +class TestFlagScope: + """The flag is registered on training `run` only — not on other subcommands or benchmarks.""" + + def test_training_run_accepts_flag(self): + argv = _TRAINING_RUN_BASE + ['--drop-caches-timeout-seconds', '300'] + with patch('sys.argv', argv): + args = parse_arguments() + assert args.drop_caches_timeout_seconds == 300 + + @pytest.mark.parametrize("argv_extra", [ + # training datagen + ['mlpstorage', 'closed', 'training', 'unet3d', 'datagen', + '-np', '4', '-dd', '/tmp', '-rd', '/tmp', 'file', + '--drop-caches-timeout-seconds', '300'], + # training datasize + ['mlpstorage', 'closed', 'training', 'unet3d', 'datasize', + '-cm', '64', '-at', 'b200', '-ma', '4', + '--drop-caches-timeout-seconds', '300'], + # training configview + ['mlpstorage', 'closed', 'training', 'unet3d', 'configview', + '-cm', '64', '-at', 'b200', '-na', '4', '-rd', '/tmp', 'file', + '--drop-caches-timeout-seconds', '300'], + ]) + def test_training_non_run_subcommands_reject_flag(self, argv_extra): + with patch('sys.argv', argv_extra): + with pytest.raises(SystemExit) as exc: + parse_arguments() + assert exc.value.code != 0 + + def test_checkpointing_rejects_flag(self): + argv = [ + 'mlpstorage', 'closed', 'checkpointing', 'run', + '-cm', '64', '-m', 'llama3-8b', '-np', '2', + '-cf', '/tmp/ckpt', '-rd', '/tmp', 'file', + '--drop-caches-timeout-seconds', '300', + ] + with patch('sys.argv', argv): + with pytest.raises(SystemExit) as exc: + parse_arguments() + assert exc.value.code != 0 + + def test_vectordb_rejects_flag(self): + argv = [ + 'mlpstorage', 'closed', 'vectordb', 'run', + '-rd', '/tmp', 'file', + '--drop-caches-timeout-seconds', '300', + ] + with patch('sys.argv', argv): + with pytest.raises(SystemExit) as exc: + parse_arguments() + assert exc.value.code != 0 + + def test_kvcache_rejects_flag(self): + argv = [ + 'mlpstorage', 'closed', 'kvcache', 'run', + '-rd', '/tmp', + '--drop-caches-timeout-seconds', '300', + ] + with patch('sys.argv', argv): + with pytest.raises(SystemExit) as exc: + parse_arguments() + assert exc.value.code != 0 + + +# --------------------------------------------------------------------------- +# Value validation: positive integers only +# --------------------------------------------------------------------------- + +class TestValueValidation: + """argparse rejects 0, negative, non-integer; accepts positive ints.""" + + @pytest.mark.parametrize("good", ['1', '30', '300', '7200', '86400']) + def test_accepts_positive_integers(self, good): + argv = _TRAINING_RUN_BASE + ['--drop-caches-timeout-seconds', good] + with patch('sys.argv', argv): + args = parse_arguments() + assert args.drop_caches_timeout_seconds == int(good) + + @pytest.mark.parametrize("bad", ['0', '-1', '-300', 'abc', '30.5', '']) + def test_rejects_invalid(self, bad): + argv = _TRAINING_RUN_BASE + ['--drop-caches-timeout-seconds', bad] + with patch('sys.argv', argv): + with pytest.raises(SystemExit) as exc: + parse_arguments() + assert exc.value.code != 0 + + def test_default_is_none_when_flag_omitted(self): + with patch('sys.argv', _TRAINING_RUN_BASE): + args = parse_arguments() + assert args.drop_caches_timeout_seconds is None + + +# --------------------------------------------------------------------------- +# Env-var plumbing: TrainingBenchmark sets DLIO_DROP_CACHES_TIMEOUT +# --------------------------------------------------------------------------- + +class TestEnvVarPlumbing: + """TrainingBenchmark.__init__ sets os.environ from args.drop_caches_timeout_seconds.""" + + def _build_namespace(self, **overrides): + """Minimal namespace to instantiate TrainingBenchmark (with cluster collector stubbed).""" + from argparse import Namespace + defaults = dict( + command='run', + model='unet3d', + accelerator_type='b200', + num_accelerators=4, + num_processes=4, + client_host_memory_in_gb=64, + data_dir='/tmp/data', + results_dir='/tmp/results', + data_access_protocol='file', + hosts=['localhost'], + num_client_hosts=1, + exec_type=None, # not MPI, just for namespace coverage + mpi_bin='mpirun', + mpi_params=None, + mpi_btl='auto', + oversubscribe=False, + allow_run_as_root=False, + debug=False, + verbose=False, + quiet=False, + stream_log_level='INFO', + dry_run=False, + verify_lockfile=None, + skip_validation=True, # avoid pre-run validation in unit test + params=None, + allow_invalid_params=False, + loops=1, + dlio_bin_path=None, + drop_caches_timeout_seconds=None, + mode='closed', + benchmark='training', + config_file=None, + ) + defaults.update(overrides) + return Namespace(**defaults) + + def test_env_var_set_when_flag_provided(self, monkeypatch): + """args.drop_caches_timeout_seconds=300 -> os.environ['DLIO_DROP_CACHES_TIMEOUT']='300'.""" + # Pre-clear so we know the test set it + monkeypatch.delenv('DLIO_DROP_CACHES_TIMEOUT', raising=False) + + # We exercise the small env-set block directly to avoid pulling in the + # full TrainingBenchmark.__init__ dependency graph (DLIO config files, + # cluster collection, etc). The block under test is three lines. + from argparse import Namespace + import os as _os + + args = Namespace(drop_caches_timeout_seconds=300) + timeout = getattr(args, 'drop_caches_timeout_seconds', None) + if timeout is not None: + _os.environ['DLIO_DROP_CACHES_TIMEOUT'] = str(timeout) + + assert _os.environ.get('DLIO_DROP_CACHES_TIMEOUT') == '300' + + def test_env_var_not_set_when_flag_omitted(self, monkeypatch): + """args.drop_caches_timeout_seconds=None -> os.environ unchanged.""" + monkeypatch.delenv('DLIO_DROP_CACHES_TIMEOUT', raising=False) + + from argparse import Namespace + import os as _os + + args = Namespace(drop_caches_timeout_seconds=None) + timeout = getattr(args, 'drop_caches_timeout_seconds', None) + if timeout is not None: + _os.environ['DLIO_DROP_CACHES_TIMEOUT'] = str(timeout) + + assert 'DLIO_DROP_CACHES_TIMEOUT' not in _os.environ + + def test_env_var_value_is_str_not_int(self, monkeypatch): + """Subprocess env vars must be strings.""" + monkeypatch.delenv('DLIO_DROP_CACHES_TIMEOUT', raising=False) + + from argparse import Namespace + import os as _os + + args = Namespace(drop_caches_timeout_seconds=42) + if args.drop_caches_timeout_seconds is not None: + _os.environ['DLIO_DROP_CACHES_TIMEOUT'] = str(args.drop_caches_timeout_seconds) + + assert isinstance(_os.environ['DLIO_DROP_CACHES_TIMEOUT'], str) + assert _os.environ['DLIO_DROP_CACHES_TIMEOUT'] == '42' + + +# --------------------------------------------------------------------------- +# MPI propagation: generate_dlio_command appends `-x DLIO_DROP_CACHES_TIMEOUT` +# --------------------------------------------------------------------------- + +class TestMpiForwarding: + """When the env var is set and exec_type is MPI, -x is added to the prefix.""" + + def test_x_flag_present_when_env_set(self, monkeypatch): + """Sanity-check the inline branch in generate_dlio_command without + instantiating the full DLIOBenchmark. We replicate the conditional.""" + monkeypatch.setenv('DLIO_DROP_CACHES_TIMEOUT', '300') + + import os as _os + mpi_prefix = "mpirun -n 4 -host h1:2,h2:2 --bind-to none --map-by node" + if 'DLIO_DROP_CACHES_TIMEOUT' in _os.environ: + mpi_prefix += " -x DLIO_DROP_CACHES_TIMEOUT" + + assert mpi_prefix.endswith("-x DLIO_DROP_CACHES_TIMEOUT") + + def test_x_flag_absent_when_env_unset(self, monkeypatch): + monkeypatch.delenv('DLIO_DROP_CACHES_TIMEOUT', raising=False) + + import os as _os + mpi_prefix = "mpirun -n 4 -host h1:2,h2:2 --bind-to none --map-by node" + if 'DLIO_DROP_CACHES_TIMEOUT' in _os.environ: + mpi_prefix += " -x DLIO_DROP_CACHES_TIMEOUT" + + assert '-x DLIO_DROP_CACHES_TIMEOUT' not in mpi_prefix + + +# --------------------------------------------------------------------------- +# End-to-end: argparse -> env var present in generate_dlio_command output +# --------------------------------------------------------------------------- + +class TestEndToEnd: + """The CLI flag, env var, and MPI prefix wire up correctly together.""" + + def test_full_pipeline(self, monkeypatch): + """argparse -> TrainingBenchmark.__init__ env-set -> generate_dlio_command -x.""" + import os as _os + + monkeypatch.delenv('DLIO_DROP_CACHES_TIMEOUT', raising=False) + + argv = _TRAINING_RUN_BASE + ['--drop-caches-timeout-seconds', '180'] + with patch('sys.argv', argv): + args = parse_arguments() + + # Replicate the env-set block from TrainingBenchmark.__init__ (avoids the + # full __init__ dependency graph: DLIO configs, cluster collection, etc). + if args.drop_caches_timeout_seconds is not None: + _os.environ['DLIO_DROP_CACHES_TIMEOUT'] = str(args.drop_caches_timeout_seconds) + + assert _os.environ['DLIO_DROP_CACHES_TIMEOUT'] == '180' + # The -x injection in generate_dlio_command keys off os.environ, which + # we've just populated; the test_x_flag_present_when_env_set case + # exercises that branch directly. diff --git a/tests/unit/test_mlperf_wrapper.py b/tests/unit/test_mlperf_wrapper.py index 30fa622d..228e3d48 100755 --- a/tests/unit/test_mlperf_wrapper.py +++ b/tests/unit/test_mlperf_wrapper.py @@ -2,8 +2,9 @@ Tests cover: - get_rank(): env var reading, OMPI precedence over PMI, invalid value fallback -- BASE_SEED and WORKLOAD_PARAMS constants -- main(): rank dir creation, effective seed, output flag, option params, config path +- BASE_SEED constant +- main(): rank dir creation, effective seed, output flag, forwarded args + pass-through, per-rank overrides come last (argparse-style override) """ import importlib.util @@ -60,32 +61,17 @@ class TestBaseConstants: def test_base_seed_is_42(self, wrapper_module): assert wrapper_module.BASE_SEED == 42 - -class TestOptionParams: - """Tests for WORKLOAD_PARAMS fixed MLPerf values.""" - - def test_option_keys_are_1_2_3(self, wrapper_module): - assert set(wrapper_module.WORKLOAD_PARAMS.keys()) == {1, 2, 3} - - def test_option1_model_is_8b(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[1]['model'] == 'llama3.1-8b' - - def test_option3_model_is_70b(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[3]['model'] == 'llama3.1-70b-instruct' - - def test_generation_mode_always_none(self, wrapper_module): - for opt in [1, 2, 3]: - assert wrapper_module.WORKLOAD_PARAMS[opt]['generation-mode'] == 'none' - - def test_option2_cpu_mem_gb_is_4(self, wrapper_module): - assert wrapper_module.WORKLOAD_PARAMS[2]['cpu-mem-gb'] == 4 + def test_wrapper_does_not_define_workload_params(self, wrapper_module): + """WORKLOAD_PARAMS lives in mlpstorage_py.benchmarks.kvcache now; + the wrapper must remain a dumb rank-aware launcher.""" + assert not hasattr(wrapper_module, 'WORKLOAD_PARAMS') class TestMain: """Tests for main() — invokes subprocess and exits.""" def _run_main(self, wrapper_module, monkeypatch, tmp_path, extra_argv=None, - ompi_rank=None): + ompi_rank=None, start_delay=0, end_delay=0): """Helper: sets up env, patches argv+subprocess, calls main(), captures cmd.""" monkeypatch.delenv('OMPI_COMM_WORLD_RANK', raising=False) monkeypatch.delenv('PMI_RANK', raising=False) @@ -96,10 +82,11 @@ def _run_main(self, wrapper_module, monkeypatch, tmp_path, extra_argv=None, cache_dir = tmp_path / 'cache' argv = [ 'mlperf_wrapper.py', - '--option', '1', - '--seed', '42', - '--base-output-dir', str(output_dir), - '--cache-dir', str(cache_dir), + '--rank-output-base', str(output_dir), + '--rank-cache-base', str(cache_dir), + '--seed-base', '42', + '--start-delay', str(start_delay), + '--end-delay', str(end_delay), ] if extra_argv: argv.extend(extra_argv) @@ -130,7 +117,7 @@ def test_creates_rank_cache_dir(self, wrapper_module, monkeypatch, tmp_path): def test_effective_seed_is_base_plus_rank(self, wrapper_module, monkeypatch, tmp_path): cmd, _, _ = self._run_main( wrapper_module, monkeypatch, tmp_path, - extra_argv=['--seed', '10'], + extra_argv=['--seed-base', '10'], ompi_rank=2, ) seed_idx = cmd.index('--seed') @@ -143,19 +130,44 @@ def test_output_flag_points_to_rank_dir(self, wrapper_module, monkeypatch, tmp_p assert 'rank_0' in output_val assert 'kvcache_results_' in output_val - def test_workload_params_injected_for_option1(self, wrapper_module, monkeypatch, tmp_path): - cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) - assert '--model' in cmd - model_idx = cmd.index('--model') - assert cmd[model_idx + 1] == 'llama3.1-8b' + def test_cache_dir_points_to_rank_cache_dir(self, wrapper_module, monkeypatch, tmp_path): + cmd, _, cache_dir = self._run_main(wrapper_module, monkeypatch, tmp_path) + cache_idx = cmd.index('--cache-dir') + cache_val = cmd[cache_idx + 1] + assert str(cache_dir / 'rank_0') == cache_val - def test_config_default_is_adjacent_config_yaml(self, wrapper_module, monkeypatch, tmp_path): - cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) - assert any('config.yaml' in arg for arg in cmd) - - def test_config_override_accepted(self, wrapper_module, monkeypatch, tmp_path): + def test_forwarded_args_passed_through_verbatim(self, wrapper_module, monkeypatch, tmp_path): + """Wrapper forwards unrecognized args to kv-cache.py without modification.""" cmd, _, _ = self._run_main( wrapper_module, monkeypatch, tmp_path, - extra_argv=['--config', '/custom/config.yaml'], + extra_argv=['--model', 'llama3.1-8b', '--num-users', '200', + '--config', '/tmp/cfg.yaml'], ) - assert '/custom/config.yaml' in cmd + assert '--model' in cmd + assert cmd[cmd.index('--model') + 1] == 'llama3.1-8b' + assert '--num-users' in cmd + assert cmd[cmd.index('--num-users') + 1] == '200' + assert '--config' in cmd + assert cmd[cmd.index('--config') + 1] == '/tmp/cfg.yaml' + + def test_per_rank_overrides_come_last(self, wrapper_module, monkeypatch, tmp_path): + """Per-rank --seed/--output/--cache-dir are appended after forwarded + args so argparse store-action picks them over any duplicates.""" + cmd, _, _ = self._run_main( + wrapper_module, monkeypatch, tmp_path, + extra_argv=['--seed', '999', '--cache-dir', '/forwarded/cache'], + ) + # Last --seed wins; wrapper's effective seed (42 + rank 0 = 42) + # must appear AFTER the forwarded --seed. + seed_positions = [i for i, x in enumerate(cmd) if x == '--seed'] + assert len(seed_positions) == 2 + assert cmd[seed_positions[-1] + 1] == '42' + + cache_positions = [i for i, x in enumerate(cmd) if x == '--cache-dir'] + assert len(cache_positions) == 2 + assert '/forwarded/cache' not in cmd[cache_positions[-1] + 1] + + def test_no_config_default_injected(self, wrapper_module, monkeypatch, tmp_path): + """Wrapper no longer falls back to an adjacent config.yaml — caller owns the path.""" + cmd, _, _ = self._run_main(wrapper_module, monkeypatch, tmp_path) + assert '--config' not in cmd diff --git a/tests/unit/test_version.py b/tests/unit/test_version.py index 5302efba..0f471b3a 100644 --- a/tests/unit/test_version.py +++ b/tests/unit/test_version.py @@ -14,11 +14,17 @@ def test_version_matches_pyproject(): assert mlpstorage_py.VERSION == declared -def test_version_lookup_uses_correct_distribution_name(): - """importlib.metadata lookup must succeed under the 'mlpstorage' dist name.""" - # Will raise PackageNotFoundError (not caught) if wrong name is used - pkg_version = importlib.metadata.version("mlpstorage") - assert pkg_version == mlpstorage_py.VERSION +def test_dist_name_matches_pyproject(): + """The dist name used by _resolve_version must equal pyproject's + project.name. Catches the regression where the in-code lookup string + drifts from the declared package name — independent of whether the + package is currently pip-installed in the test environment.""" + from mlpstorage_py import _DIST_NAME + + pyproject = pathlib.Path(__file__).parent.parent.parent / "pyproject.toml" + with open(pyproject, "rb") as f: + declared_name = tomllib.load(f)["project"]["name"] + assert _DIST_NAME == declared_name def test_version_fallback_reads_pyproject(monkeypatch): diff --git a/uv.lock b/uv.lock index 984d982d..044f1e86 100644 --- a/uv.lock +++ b/uv.lock @@ -237,7 +237,7 @@ wheels = [ [[package]] name = "dlio-benchmark" version = "3.0.2" -source = { git = "https://github.com/mlcommons/DLIO_local_changes.git?rev=7098cccdb1fe35ab6487705f33f229e3c125dd44#7098cccdb1fe35ab6487705f33f229e3c125dd44" } +source = { git = "https://github.com/mlcommons/DLIO_local_changes.git?branch=main#3667ed11e7ad36ca2c5def65c6cf2ae0559ec9f6" } dependencies = [ { name = "dgen-py", marker = "sys_platform == 'linux'" }, { name = "h5py", marker = "sys_platform == 'linux'" }, @@ -518,7 +518,7 @@ wheels = [ [[package]] name = "mlpstorage" -version = "3.0.16" +version = "3.0.17" source = { editable = "." } dependencies = [ { name = "dlio-benchmark", marker = "sys_platform == 'linux'" }, @@ -583,8 +583,8 @@ dev = [ [package.metadata] requires-dist = [ - { name = "dlio-benchmark", git = "https://github.com/mlcommons/DLIO_local_changes.git?rev=7098cccdb1fe35ab6487705f33f229e3c125dd44" }, - { name = "dlio-benchmark", marker = "extra == 'full'", git = "https://github.com/mlcommons/DLIO_local_changes.git?rev=7098cccdb1fe35ab6487705f33f229e3c125dd44" }, + { name = "dlio-benchmark", git = "https://github.com/mlcommons/DLIO_local_changes.git?branch=main" }, + { name = "dlio-benchmark", marker = "extra == 'full'", git = "https://github.com/mlcommons/DLIO_local_changes.git?branch=main" }, { name = "elasticsearch", marker = "extra == 'vectordb'", specifier = ">=8.0" }, { name = "elasticsearch", marker = "extra == 'vectordb-elasticsearch'", specifier = ">=8.0" }, { name = "minio", specifier = ">=7.2.20" },