diff --git a/src/endpoints_submission_cli/submissions/builder.py b/src/endpoints_submission_cli/submissions/builder.py index 31e1b36..b9a311c 100644 --- a/src/endpoints_submission_cli/submissions/builder.py +++ b/src/endpoints_submission_cli/submissions/builder.py @@ -406,7 +406,8 @@ def _write_pareto_entries( "requests_completed": warmup_cfg.get("requests_completed"), "data_source": warmup_cfg.get("data_source"), "concurrency": warmup_cfg.get("concurrency"), - "initialization_steps": warmup_cfg.get("initialization_steps"), + # Checker types this as a list; default to [] rather than null when absent. + "initialization_steps": warmup_cfg.get("initialization_steps") or [], } point_cfg: dict[str, Any] = { diff --git a/src/submission_checker/checker.py b/src/submission_checker/checker.py index e1d4535..cae873f 100644 --- a/src/submission_checker/checker.py +++ b/src/submission_checker/checker.py @@ -6,6 +6,7 @@ from __future__ import annotations +import json from typing import TYPE_CHECKING __all__ = ["SubmissionChecker"] @@ -33,12 +34,16 @@ load_accuracy_result, load_point_config, load_result_summary, + load_run_metadata, load_system_description, ) if TYPE_CHECKING: from pathlib import Path +# Absolute tolerance for the tps_utilization consistency check. +_TPS_UTILIZATION_ABS_TOL = 0.1 + class SubmissionChecker: """Validates an MLPerf Endpoints submission directory against §9.1 rules. @@ -111,6 +116,9 @@ def run(self) -> Report: for system_json in system_jsons: report.results.extend(self._check_system(system_json, pareto_dir)) + # Submission-wide: tps_utilization must match system_tps / max(system_tps). + report.results.extend(self._check_tps_utilization(pareto_dir)) + # §15: at least one model in the submission must have an accuracy/results.json. has_full_accuracy = any(True for _ in pareto_dir.rglob("accuracy/results.json")) if has_full_accuracy: @@ -134,6 +142,66 @@ def run(self) -> Report: return report + # ------------------------------------------------------------------ + # Submission-wide checks + # ------------------------------------------------------------------ + + def _check_tps_utilization(self, pareto_dir: Path) -> list[CheckResult]: + """Verify each run's ``tps_utilization`` equals ``system_tps / max(system_tps)``. + + ``tps_utilization`` normalises a run to the peak ``system_tps`` across the + whole submission, so this is a cross-run check. Stored values are compared + to the recomputed expectation within an absolute tolerance of + ``_TPS_UTILIZATION_ABS_TOL``. Structurally invalid metadata (missing or + non-numeric fields) is left to the per-file ``run-metadata-valid`` check. + """ + entries: list[tuple[Path, float, float]] = [] + for md_path in sorted(pareto_dir.rglob("run_metadata.json")): + try: + data = json.loads(md_path.read_text()) + except (OSError, ValueError): + continue + tps = data.get("system_tps") + util = data.get("tps_utilization") + if ( + isinstance(tps, (int, float)) + and not isinstance(tps, bool) + and isinstance(util, (int, float)) + and not isinstance(util, bool) + ): + entries.append((md_path, float(tps), float(util))) + + if not entries: + return [] + max_tps = max(tps for _, tps, _ in entries) + if max_tps <= 0: + return [] + + results: list[CheckResult] = [] + for md_path, tps, util in entries: + expected = tps / max_tps + if abs(util - expected) <= _TPS_UTILIZATION_ABS_TOL: + results.append( + _ok( + "tps-utilization", + f"tps_utilization {util:.4f} matches expected {expected:.4f}", + md_path, + "#8.1", + ) + ) + else: + results.append( + _err( + "tps-utilization", + f"tps_utilization {util} != expected {expected:.4f}" + f" (system_tps {tps} / submission max {max_tps};" + f" abs tol {_TPS_UTILIZATION_ABS_TOL})", + md_path, + "#8.1", + ) + ) + return results + # ------------------------------------------------------------------ # Per-system orchestration # ------------------------------------------------------------------ @@ -290,6 +358,30 @@ def _check_model( ) ) + run_metadata_path = point_result_dir / "run_metadata.json" + if not run_metadata_path.exists(): + results.append( + _err( + "run-metadata-present", + f"Missing run_metadata.json for point_{config.concurrency}:" + f" {run_metadata_path.relative_to(self.submission_path)}", + run_metadata_path, + "#8.1", + ) + ) + else: + run_metadata, rm_results = load_run_metadata(run_metadata_path) + results.extend(rm_results) + if run_metadata is not None: + results.append( + _ok( + "run-metadata-valid", + f"run_metadata.json valid for point_{config.concurrency}", + run_metadata_path, + "#8.1", + ) + ) + summary, load_results = load_result_summary(summary_path) results.extend(load_results) if summary is None: diff --git a/src/submission_checker/models/__init__.py b/src/submission_checker/models/__init__.py index 453c786..b251b38 100644 --- a/src/submission_checker/models/__init__.py +++ b/src/submission_checker/models/__init__.py @@ -3,11 +3,13 @@ from .aggregate import MIN_QUERY_COUNT, ModelContext, PointResult from .file import ( AccuracyResult, + ConfigSummary, Division, NodeType, PercentileStats, PointConfig, PointSummary, + RunMetadata, RuntimeSettings, SystemAvailabilityStatus, SystemDescription, @@ -27,9 +29,11 @@ "NodeType", "PercentileStats", "PointConfig", + "ConfigSummary", "PointResult", "PointSummary", "RegionBounds", + "RunMetadata", "Regions", "Report", "RuntimeSettings", diff --git a/src/submission_checker/models/file/__init__.py b/src/submission_checker/models/file/__init__.py index bf38df6..02fb1b8 100644 --- a/src/submission_checker/models/file/__init__.py +++ b/src/submission_checker/models/file/__init__.py @@ -3,16 +3,19 @@ from .accuracy import AccuracyResult from .point_config import PointConfig, RuntimeSettings from .point_summary import PercentileStats, PointSummary +from .run_metadata import ConfigSummary, RunMetadata from .system import Division, NodeType, SystemAvailabilityStatus, SystemDescription __all__ = [ "AccuracyResult", + "ConfigSummary", "Division", "NodeType", "SystemAvailabilityStatus", "PercentileStats", "PointConfig", "PointSummary", + "RunMetadata", "RuntimeSettings", "SystemDescription", ] diff --git a/src/submission_checker/models/file/run_metadata.py b/src/submission_checker/models/file/run_metadata.py new file mode 100644 index 0000000..a6322ed --- /dev/null +++ b/src/submission_checker/models/file/run_metadata.py @@ -0,0 +1,101 @@ +"""Run metadata model — ``run_metadata.json`` schema and per-file validation. + +``run_metadata.json`` is generated by the submission CLI for every measurement +point and lives at ``pareto///results/point_/run_metadata.json``. + +Validation policy: + * Every measurement field must be present **and non-null**. + * ``config_summary`` is either a string of length >= 4 or a structured + :class:`ConfigSummary` object. + * Only ``config_summary_notes``, ``link_config``, and ``link_logs`` may be + null — but their keys must still be present. +""" + +from __future__ import annotations + +from typing import Annotated + +from pydantic import BaseModel, ConfigDict, StringConstraints + +__all__ = ["ConfigSummary", "RunMetadata"] + + +class ConfigSummary(BaseModel): + """Structured parallelism/config summary (the object form of ``config_summary``).""" + + model_config = ConfigDict(extra="allow") + + disaggregated: bool | None = None + expert_parallel: int | None = None + tensor_parallel: int | None = None + pipeline_parallel: int | None = None + data_parallel: int | None = None + batch: int | None = None + + +# config_summary accepts either a structured object or a free-form string (len >= 4). +_ConfigSummaryStr = Annotated[str, StringConstraints(min_length=4)] + + +class RunMetadata(BaseModel): + """Parsed contents of ``run_metadata.json``. + + All fields are required and non-null except ``config_summary_notes``, + ``link_config``, and ``link_logs``, whose keys must be present but whose + values may be null. + """ + + model_config = ConfigDict(extra="allow") + + # Identity / configuration + run_date: str + node_config: str + config_summary: ConfigSummary | _ConfigSummaryStr + config_summary_notes: str | None + concurrency: int + + # Headline metrics + system_tps: float + tps_per_user: float + ttft: float + qps: float + tps_utilization: float + + # Run accounting + measured_total_output_tokens: int + measured_run_duration: float + measured_total_requests: int + + # Optional reference links (key required, value may be null) + link_config: str | None + link_logs: str | None + + # TTFT latencies + measured_latency_ttft_min: float + measured_latency_ttft_average: float + measured_latency_ttft_p50: float + measured_latency_ttft_p90: float + measured_latency_ttft_p95: float + measured_latency_ttft_p99: float + measured_latency_ttft_p999: float + measured_latency_ttft_max: float + + # TPOT latencies + measured_latency_tpot_min: float + measured_latency_tpot_average: float + measured_latency_tpot_p50: float + measured_latency_tpot_p90: float + measured_latency_tpot_p95: float + measured_latency_tpot_p99: float + measured_latency_tpot_p999: float + measured_latency_tpot_max: float + + # End-to-end request latencies + measured_latency_request_min: float + measured_latency_request_average: float + measured_latency_request_p50: float + measured_latency_request_p90: float + measured_latency_request_p95: float + measured_latency_request_p99: float + measured_latency_request_p999: float + measured_latency_request_max: float diff --git a/src/submission_checker/models/file/system.py b/src/submission_checker/models/file/system.py index f00ab90..a53c69d 100644 --- a/src/submission_checker/models/file/system.py +++ b/src/submission_checker/models/file/system.py @@ -4,7 +4,7 @@ from enum import Enum -from pydantic import BaseModel, ConfigDict, field_validator +from pydantic import BaseModel, ConfigDict, field_validator, model_validator __all__ = ["Division", "NodeType", "SystemAvailabilityStatus", "SystemDescription"] @@ -37,14 +37,14 @@ class NodeType(BaseModel): host_processor_core_count: int | None = None host_processor_vcpu_count: int | None = None host_memory_capacity: str | None = None - host_memory_configuration: str | None = None + host_memory_configuration: str accelerator_model_name: str | None = None accelerators_per_node: int | None = None accelerator_memory_capacity: str | None = None - accelerator_memory_type: str | None = None + accelerator_memory_type: str accelerator_interconnect: str | None = None accelerator_host_interconnect: str | None = None - host_network_card_count: str | None = None + host_network_card_count: str host_networking: str | None = None host_storage_capacity: str | None = None host_storage_type: str | None = None @@ -52,9 +52,9 @@ class NodeType(BaseModel): hw_notes: str | None = None cooling: str | None = None inference_backend: str | None = None - driver: str | None = None + driver: str operating_system: str | None = None - filesystem: str | None = None + filesystem: str container_link: str | None = None other_software_stack: str | None = None sw_notes: str | None = None @@ -69,6 +69,16 @@ def _coerce_to_int(cls, v: object) -> object: return v return v + @model_validator(mode="after") + def _require_core_or_vcpu_count(self) -> NodeType: + """A node must disclose at least one of physical core count or vCPU count.""" + if self.host_processor_core_count is None and self.host_processor_vcpu_count is None: + raise ValueError( + "node_types entry must specify host_processor_core_count or" + " host_processor_vcpu_count" + ) + return self + class SystemDescription(BaseModel): """Parsed contents of ``systems/.json`` (§8.2). @@ -81,7 +91,7 @@ class SystemDescription(BaseModel): # Org / submission metadata submitter_org_names: str - submitter_contact: str | None = None + submitter_contact: str submission_id: str | None = None submission_date: str | None = None publish_date: str | None = None @@ -91,31 +101,31 @@ class SystemDescription(BaseModel): system_category: str system_availability_status: SystemAvailabilityStatus max_supported_concurrency: int - system_size: str | None = None - system_node_ensemble_count: int | None = None - system_node_ensemble_total: int | None = None + system_size: str + system_node_ensemble_count: int + system_node_ensemble_total: int serving_framework: str | None = None node_types: list[NodeType] # Division / model metadata division: Division - model_id: str | None = None + model_id: str model_name: str | None = None - model_precision: str | None = None - link_to_model: str | None = None + model_precision: str + link_to_model: str link_to_model_transformation: str | None = None model_notes: str | None = None # Dataset metadata - dataset_id: str | None = None - dataset_name: str | None = None - input_token_average: float | None = None - output_token_average: float | None = None - dataset_type: str | None = None - dataset_link: str | None = None + dataset_id: str + dataset_name: str + input_token_average: float + output_token_average: float + dataset_type: str + dataset_link: str # Accuracy - measured_accuracy_score: str | float | None = None + measured_accuracy_score: str | float @field_validator("division", mode="before") @classmethod diff --git a/src/submission_checker/models/loader.py b/src/submission_checker/models/loader.py index 329cc23..a28717e 100644 --- a/src/submission_checker/models/loader.py +++ b/src/submission_checker/models/loader.py @@ -9,6 +9,7 @@ "load_accuracy_result", "load_point_config", "load_result_summary", + "load_run_metadata", "load_system_description", ] @@ -17,7 +18,7 @@ import yaml from pydantic import ValidationError -from .file import AccuracyResult, PointConfig, PointSummary, SystemDescription +from .file import AccuracyResult, PointConfig, PointSummary, RunMetadata, SystemDescription from .results import CheckResult, Severity @@ -130,6 +131,27 @@ def load_result_summary(path: Path) -> tuple[PointSummary | None, list[CheckResu return None, _validation_errors(exc, "result-file-valid", path) +def load_run_metadata(path: Path) -> tuple[RunMetadata | None, list[CheckResult]]: + """Load and validate ``run_metadata.json``. + + Returns: + A ``(model, check_results)`` pair. On success the model is not None and + check_results is empty. On failure the model is None and check_results + contains one entry per validation error. + """ + data, load_err = _load_json(path) + if load_err: + return None, [ + CheckResult( + rule="run-metadata-valid", message=load_err, severity=Severity.ERROR, path=path + ) + ] + try: + return RunMetadata.model_validate(data), [] + except ValidationError as exc: + return None, _validation_errors(exc, "run-metadata-valid", path) + + def load_accuracy_result( path: Path, ) -> tuple[AccuracyResult | None, list[CheckResult]]: diff --git a/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_16/run_metadata.json b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_38/run_metadata.json b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_38/run_metadata.json new file mode 100644 index 0000000..76150a2 --- /dev/null +++ b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_38/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 38, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_88/run_metadata.json b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_88/run_metadata.json new file mode 100644 index 0000000..3d7b416 --- /dev/null +++ b/test_submissions/invalid_submission/pareto/bad_system/Llama-3_1-8B-Instruct/results/point_88/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 88, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/invalid_submission/systems/bad_system.json b/test_submissions/invalid_submission/systems/bad_system.json index 57b930b..0565fa3 100644 --- a/test_submissions/invalid_submission/systems/bad_system.json +++ b/test_submissions/invalid_submission/systems/bad_system.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Invalid Corp", + "submitter_contact": "contact@example.com", "system_name": "bad_system", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "Ethernet 1Gb", "host_storage_capacity": "1 TB", "host_storage_type": "HDD", - "operating_system": "Ubuntu 20.04" + "operating_system": "Ubuntu 20.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "Llama-3_1-8B-Instruct", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_1024/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_128/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_16/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_2048/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_2048/run_metadata.json new file mode 100644 index 0000000..95fff71 --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_2048/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2048, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_4/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_4/run_metadata.json new file mode 100644 index 0000000..9e2a5bb --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_4/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_512/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_64/run_metadata.json b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_a/pareto/sys_mi355x_8gpu/gpt-oss-120b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_a/systems/sys_mi355x_8gpu.json b/test_submissions/sub_a/systems/sys_mi355x_8gpu.json index 249c3ad..07c4786 100644 --- a/test_submissions/sub_a/systems/sys_mi355x_8gpu.json +++ b/test_submissions/sub_a/systems/sys_mi355x_8gpu.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-A", + "submitter_contact": "contact@example.com", "system_name": "sys_mi355x_8gpu", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "InfiniBand HDR", "host_storage_capacity": "30 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "gpt-oss-120b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_1024/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_128/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_16/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_2048/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_2048/run_metadata.json new file mode 100644 index 0000000..95fff71 --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_2048/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2048, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_4/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_4/run_metadata.json new file mode 100644 index 0000000..9e2a5bb --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_4/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_512/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_64/run_metadata.json b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_b/pareto/sys_mi355x_16gpu/gpt-oss-120b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_b/systems/sys_mi355x_16gpu.json b/test_submissions/sub_b/systems/sys_mi355x_16gpu.json index 1526711..4ab1115 100644 --- a/test_submissions/sub_b/systems/sys_mi355x_16gpu.json +++ b/test_submissions/sub_b/systems/sys_mi355x_16gpu.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-B", + "submitter_contact": "contact@example.com", "system_name": "sys_mi355x_16gpu", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "InfiniBand HDR", "host_storage_capacity": "30 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "gpt-oss-120b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_128/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_16/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_256/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_32/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_512/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_64/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_8/run_metadata.json b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_8/run_metadata.json new file mode 100644 index 0000000..77032d6 --- /dev/null +++ b/test_submissions/sub_c/pareto/sys_tpu_4chip/qwen3-coder-480b/results/point_8/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_c/systems/sys_tpu_4chip.json b/test_submissions/sub_c/systems/sys_tpu_4chip.json index 23c0d1e..10b32f1 100644 --- a/test_submissions/sub_c/systems/sys_tpu_4chip.json +++ b/test_submissions/sub_c/systems/sys_tpu_4chip.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-C", + "submitter_contact": "contact@example.com", "system_name": "sys_tpu_4chip", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "Ethernet 100Gb", "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Debian 11" + "operating_system": "Debian 11", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "qwen3-coder-480b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_1024/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_128/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_16/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_256/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_32/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_512/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_64/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_8/run_metadata.json b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_8/run_metadata.json new file mode 100644 index 0000000..77032d6 --- /dev/null +++ b/test_submissions/sub_d/pareto/sys_tpu_8chip/qwen3-coder-480b/results/point_8/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_d/systems/sys_tpu_8chip.json b/test_submissions/sub_d/systems/sys_tpu_8chip.json index 69126e8..129c914 100644 --- a/test_submissions/sub_d/systems/sys_tpu_8chip.json +++ b/test_submissions/sub_d/systems/sys_tpu_8chip.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-D", + "submitter_contact": "contact@example.com", "system_name": "sys_tpu_8chip", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "Ethernet 100Gb", "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Debian 11" + "operating_system": "Debian 11", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "qwen3-coder-480b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1/run_metadata.json new file mode 100644 index 0000000..baadd5b --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1024/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_128/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_16/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_2/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_2/run_metadata.json new file mode 100644 index 0000000..efc6fa2 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_2/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_256/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_32/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_4/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_4/run_metadata.json new file mode 100644 index 0000000..9e2a5bb --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_4/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_512/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_64/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_8/run_metadata.json b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_8/run_metadata.json new file mode 100644 index 0000000..77032d6 --- /dev/null +++ b/test_submissions/sub_e/pareto/sys_gaudi_dp1/llama3-8b/results/point_8/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_e/systems/sys_gaudi_dp1.json b/test_submissions/sub_e/systems/sys_gaudi_dp1.json index 9efc03a..508f535 100644 --- a/test_submissions/sub_e/systems/sys_gaudi_dp1.json +++ b/test_submissions/sub_e/systems/sys_gaudi_dp1.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-E", + "submitter_contact": "contact@example.com", "system_name": "sys_gaudi_dp1", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "Ethernet 200Gb", "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "llama3-8b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1/run_metadata.json new file mode 100644 index 0000000..baadd5b --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1024/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_128/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_16/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_2/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_2/run_metadata.json new file mode 100644 index 0000000..efc6fa2 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_2/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_256/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_32/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_4/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_4/run_metadata.json new file mode 100644 index 0000000..9e2a5bb --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_4/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_512/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_64/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_8/run_metadata.json b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_8/run_metadata.json new file mode 100644 index 0000000..77032d6 --- /dev/null +++ b/test_submissions/sub_f/pareto/sys_gaudi_dp2/llama3-8b/results/point_8/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_f/systems/sys_gaudi_dp2.json b/test_submissions/sub_f/systems/sys_gaudi_dp2.json index d5fab8a..58f1549 100644 --- a/test_submissions/sub_f/systems/sys_gaudi_dp2.json +++ b/test_submissions/sub_f/systems/sys_gaudi_dp2.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-F", + "submitter_contact": "contact@example.com", "system_name": "sys_gaudi_dp2", "system_category": "datacenter", "system_availability_status": "Available", @@ -19,8 +20,26 @@ "host_networking": "Ethernet 200Gb", "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "llama3-8b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 2 } diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1024/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_128/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1536/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1536/run_metadata.json new file mode 100644 index 0000000..b6224ab --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_1536/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1536, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_192/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_192/run_metadata.json new file mode 100644 index 0000000..8177cac --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_192/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 192, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_2048/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_2048/run_metadata.json new file mode 100644 index 0000000..95fff71 --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_2048/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2048, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_256/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_384/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_384/run_metadata.json new file mode 100644 index 0000000..afe57ce --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_384/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 384, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_512/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_64/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_768/run_metadata.json b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_768/run_metadata.json new file mode 100644 index 0000000..5f7ce18 --- /dev/null +++ b/test_submissions/sub_g/pareto/sys_8gpu_vllm/llama3-70b/results/point_768/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 768, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_g/systems/sys_8gpu_vllm.json b/test_submissions/sub_g/systems/sys_8gpu_vllm.json index e1303c9..4bd3be8 100644 --- a/test_submissions/sub_g/systems/sys_8gpu_vllm.json +++ b/test_submissions/sub_g/systems/sys_8gpu_vllm.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-G", + "submitter_contact": "contact@example.com", "system_name": "sys_8gpu_vllm", "system_category": "datacenter", "system_availability_status": "Available", @@ -20,8 +21,26 @@ "host_networking": "InfiniBand NDR", "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "llama3-70b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1024/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_128/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1536/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1536/run_metadata.json new file mode 100644 index 0000000..b6224ab --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_1536/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1536, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_192/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_192/run_metadata.json new file mode 100644 index 0000000..8177cac --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_192/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 192, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_2048/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_2048/run_metadata.json new file mode 100644 index 0000000..95fff71 --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_2048/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2048, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_256/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_384/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_384/run_metadata.json new file mode 100644 index 0000000..afe57ce --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_384/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 384, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_512/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_64/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_768/run_metadata.json b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_768/run_metadata.json new file mode 100644 index 0000000..5f7ce18 --- /dev/null +++ b/test_submissions/sub_h/pareto/sys_8gpu_sglang/llama3-70b/results/point_768/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 768, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_h/systems/sys_8gpu_sglang.json b/test_submissions/sub_h/systems/sys_8gpu_sglang.json index bd93753..9c7a5ed 100644 --- a/test_submissions/sub_h/systems/sys_8gpu_sglang.json +++ b/test_submissions/sub_h/systems/sys_8gpu_sglang.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-H", + "submitter_contact": "contact@example.com", "system_name": "sys_8gpu_sglang", "system_category": "datacenter", "system_availability_status": "Available", @@ -20,8 +21,26 @@ "host_networking": "InfiniBand NDR", "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "llama3-70b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_1/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_1/run_metadata.json new file mode 100644 index 0000000..baadd5b --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_1/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_128/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_16/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_2/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_2/run_metadata.json new file mode 100644 index 0000000..efc6fa2 --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_2/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_256/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_32/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_4/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_4/run_metadata.json new file mode 100644 index 0000000..9e2a5bb --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_4/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_512/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_64/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_8/run_metadata.json b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_8/run_metadata.json new file mode 100644 index 0000000..77032d6 --- /dev/null +++ b/test_submissions/sub_i/pareto/sys_h200_8gpu/deepseek-r1/results/point_8/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_i/systems/sys_h200_8gpu.json b/test_submissions/sub_i/systems/sys_h200_8gpu.json index 7019ef2..343cb34 100644 --- a/test_submissions/sub_i/systems/sys_h200_8gpu.json +++ b/test_submissions/sub_i/systems/sys_h200_8gpu.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-I", + "submitter_contact": "contact@example.com", "system_name": "sys_h200_8gpu", "system_category": "datacenter", "system_availability_status": "Available", @@ -21,8 +22,26 @@ "host_networking": "InfiniBand NDR", "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "deepseek-r1", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_1024/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_1024/run_metadata.json new file mode 100644 index 0000000..e482ca9 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_1024/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1024, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_128/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_128/run_metadata.json new file mode 100644 index 0000000..6b77650 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_128/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_16384/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_16384/run_metadata.json new file mode 100644 index 0000000..90ce11c --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_16384/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16384, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_2048/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_2048/run_metadata.json new file mode 100644 index 0000000..95fff71 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_2048/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 2048, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_256/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_32/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_32/run_metadata.json new file mode 100644 index 0000000..43654d8 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_32/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 32, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_4096/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_4096/run_metadata.json new file mode 100644 index 0000000..03fd222 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_4096/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 4096, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_512/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_64/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_64/run_metadata.json new file mode 100644 index 0000000..78a354c --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_64/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 64, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_8192/run_metadata.json b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_8192/run_metadata.json new file mode 100644 index 0000000..f1069f4 --- /dev/null +++ b/test_submissions/sub_j/pareto/sys_gb300_72gpu/deepseek-r1/results/point_8192/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 8192, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/sub_j/systems/sys_gb300_72gpu.json b/test_submissions/sub_j/systems/sys_gb300_72gpu.json index 228bce2..9397b1a 100644 --- a/test_submissions/sub_j/systems/sys_gb300_72gpu.json +++ b/test_submissions/sub_j/systems/sys_gb300_72gpu.json @@ -1,5 +1,6 @@ { "submitter_org_names": "Submitter-J", + "submitter_contact": "contact@example.com", "system_name": "sys_gb300_72gpu", "system_category": "datacenter", "system_availability_status": "Available", @@ -20,8 +21,26 @@ "host_networking": "InfiniBand NDR400", "host_storage_capacity": "30 TB", "host_storage_type": "NVMe SSD", - "operating_system": "Ubuntu 22.04 (aarch64)" + "operating_system": "Ubuntu 22.04 (aarch64)", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "deepseek-r1", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_1000/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_1000/run_metadata.json new file mode 100644 index 0000000..6396ae6 --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_1000/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 1000, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_16/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_16/run_metadata.json new file mode 100644 index 0000000..c4b57fd --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_16/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 16, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_256/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_256/run_metadata.json new file mode 100644 index 0000000..254a4e9 --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_256/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 256, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_38/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_38/run_metadata.json new file mode 100644 index 0000000..76150a2 --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_38/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 38, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_512/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_512/run_metadata.json new file mode 100644 index 0000000..509161e --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_512/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 512, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_768/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_768/run_metadata.json new file mode 100644 index 0000000..5f7ce18 --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_768/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 768, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_88/run_metadata.json b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_88/run_metadata.json new file mode 100644 index 0000000..3d7b416 --- /dev/null +++ b/test_submissions/valid_standardized/pareto/acme_h100x8_001/llama3-70b/results/point_88/run_metadata.json @@ -0,0 +1,48 @@ +{ + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": { + "disaggregated": null, + "expert_parallel": 1, + "tensor_parallel": 1, + "pipeline_parallel": 1, + "data_parallel": 1, + "batch": null + }, + "config_summary_notes": null, + "concurrency": 88, + "system_tps": 306.94, + "tps_per_user": 2.4, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": null, + "link_logs": null, + "measured_latency_ttft_min": 10.624, + "measured_latency_ttft_average": 32.218, + "measured_latency_ttft_p50": 30.0, + "measured_latency_ttft_p90": 280.0, + "measured_latency_ttft_p95": 300.0, + "measured_latency_ttft_p99": 312.5, + "measured_latency_ttft_p999": 316.572, + "measured_latency_ttft_max": 316.675, + "measured_latency_tpot_min": 3.076, + "measured_latency_tpot_average": 4.049, + "measured_latency_tpot_p50": 4.0, + "measured_latency_tpot_p90": 5.5, + "measured_latency_tpot_p95": 6.0, + "measured_latency_tpot_p99": 6.5, + "measured_latency_tpot_p999": 6.863, + "measured_latency_tpot_max": 6.919, + "measured_latency_request_min": 47.108, + "measured_latency_request_average": 271.979, + "measured_latency_request_p50": 250.0, + "measured_latency_request_p90": 600.0, + "measured_latency_request_p95": 620.0, + "measured_latency_request_p99": 640.0, + "measured_latency_request_p999": 643.234, + "measured_latency_request_max": 650.0 +} \ No newline at end of file diff --git a/test_submissions/valid_standardized/systems/acme_h100x8_001.json b/test_submissions/valid_standardized/systems/acme_h100x8_001.json index eb1aff3..5dc6e4d 100644 --- a/test_submissions/valid_standardized/systems/acme_h100x8_001.json +++ b/test_submissions/valid_standardized/systems/acme_h100x8_001.json @@ -1,5 +1,6 @@ { "submitter_org_names": "ACME Corp", + "submitter_contact": "contact@example.com", "system_name": "ACME H100x8 Node", "system_category": "datacenter", "system_availability_status": "Available", @@ -22,8 +23,26 @@ "host_storage_capacity": "20 TB", "host_storage_type": "NVMe SSD", "cooling": "Air-cooled", - "operating_system": "Ubuntu 22.04" + "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3e", + "host_network_card_count": "1x NIC", + "driver": "1.0", + "filesystem": "ext4" } ], - "division": "Standardized" + "division": "Standardized", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", + "system_size": "1 node", + "model_id": "llama3-70b", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1 } diff --git a/tests/endpoints_submission_cli/conftest.py b/tests/endpoints_submission_cli/conftest.py index 859be2c..58278bd 100644 --- a/tests/endpoints_submission_cli/conftest.py +++ b/tests/endpoints_submission_cli/conftest.py @@ -71,13 +71,26 @@ "host_storage_type": "NVMe SSD", "host_storage_capacity": "3.84 TB", "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "24x 64GB DDR5", + "accelerator_memory_type": "HBM3", + "driver": "550.54", + "filesystem": "ext4", } ], "division": "Standardized", + "system_size": "1x node, 8x H100", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1, "model_id": "llama3.1-8b", "model_name": "Llama 3.1 8B Instruct", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", "dataset_id": "cnn-dailymail", "dataset_name": "cnn_dailymail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", "measured_accuracy_score": 0.45, } diff --git a/tests/endpoints_submission_cli/submissions/test_builder.py b/tests/endpoints_submission_cli/submissions/test_builder.py index a41e0e2..5713c53 100644 --- a/tests/endpoints_submission_cli/submissions/test_builder.py +++ b/tests/endpoints_submission_cli/submissions/test_builder.py @@ -20,6 +20,7 @@ create_bundle_archive, extract_archive, ) +from submission_checker.models import Severity @pytest.mark.unit @@ -590,6 +591,95 @@ def test_built_runtime_settings_validate_against_checker( assert rs.runtime.dataloader_random_seed == 42 +@pytest.mark.unit +class TestBuilderCheckerContract: + """End-to-end: the builder's output must be consumable by the real checker. + + This is the coverage that was missing — builder unit tests only inspected + individual YAML keys, and checker unit tests only validated hand-built dicts, + so a structural mismatch (builder omitting a field the checker requires) slipped + through. These tests run the builder's actual output through the real checker. + """ + + def _complete_run_archive(self, run_folder: Path, tmp_path: Path, name: str = "run") -> Path: + """A run folder whose config has every field the checker needs to parse a point.""" + import shutil + + folder = tmp_path / name + shutil.copytree(run_folder, folder) + cfg = yaml.safe_load((folder / "config.yaml").read_text()) + cfg["settings"]["runtime"].update( + { + "min_duration_ms": 600_000, + "scheduler_random_seed": 42, + "dataloader_random_seed": 42, + } + ) + cfg["settings"]["client"] = {"stream_all_chunks": True} + cfg["settings"]["warmup"] = { + "enabled": True, + "salt": False, + "duration_s": 60.0, + "requests_issued": 100, + "requests_completed": 100, + "data_source": "warmup-ds", + "concurrency": 4, + } + (folder / "config.yaml").write_text(yaml.dump(cfg)) + archive = tmp_path / f"{name}.tar.gz" + with tarfile.open(archive, "w:gz") as tar: + tar.add(folder, arcname=name) + return archive + + def test_built_points_parse_through_checker_pointconfig( + self, run_folder: Path, tmp_path: Path + ) -> None: + """Every built point_*.yaml must validate against the checker's PointConfig.""" + from submission_checker.models.file.point_config import PointConfig + + archive = self._complete_run_archive(run_folder, tmp_path) + sub_dir = build_submission_folder( + [("run-001", archive)], "standardized", "available", tmp_path / "sub" + ) + points = list(sub_dir.rglob("point_*.yaml")) + assert points, "builder produced no point YAML" + for py in points: + data = yaml.safe_load(py.read_text()) + # Must not raise — this is exactly what failed before the runtime fix. + cfg = PointConfig.model_validate(data, context={"yaml_path": py}) + assert cfg.runtime_settings.runtime.scheduler_random_seed == 42 + assert cfg.runtime_settings.runtime.dataloader_random_seed == 42 + # No per-point structural/seed errors for a compliant input. + errors = [r for r in cfg._check_results if r.severity == Severity.ERROR] + assert not errors, f"{py.name} unexpected errors: {[(r.rule, r.message) for r in errors]}" + + def test_full_checker_runs_without_parse_failures( + self, run_folder: Path, tmp_path: Path + ) -> None: + """Running the real SubmissionChecker on built output yields no parse/validation errors. + + Other rule failures (e.g. point-count) are acceptable here — we only assert that + nothing failed to *parse*, which is the builder↔checker contract. + """ + from submission_checker.checker import SubmissionChecker + + archive = self._complete_run_archive(run_folder, tmp_path) + sub_dir = build_submission_folder( + [("run-001", archive)], "standardized", "available", tmp_path / "sub" + ) + report = SubmissionChecker(sub_dir).run() + parse_failures = [ + r + for r in report.results + if r.severity == Severity.ERROR + and ("validation error" in r.message.lower() or "field required" in r.message.lower()) + ] + assert not parse_failures, ( + "checker hit parse/validation failures on builder output: " + f"{[(r.rule, r.message) for r in parse_failures]}" + ) + + @pytest.mark.unit class TestTruncateResponses: def _make_content(self, n_responses: int) -> bytes: diff --git a/tests/submission_checker/conftest.py b/tests/submission_checker/conftest.py index 45e9816..3d83d90 100644 --- a/tests/submission_checker/conftest.py +++ b/tests/submission_checker/conftest.py @@ -44,9 +44,14 @@ "accelerators_per_node": 8, "accelerator_memory_capacity": "80 GB", "host_networking": "InfiniBand", + "host_network_card_count": "4x NIC", "host_storage_type": "NVMe", "host_storage_capacity": "10 TB", "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3", + "driver": "550.54", + "filesystem": "ext4", } _M = 1024 @@ -64,13 +69,27 @@ def _system_desc( ) -> SystemDescription: return SystemDescription( submitter_org_names="Test Org", + submitter_contact="contact@example.com", system_name="test-sys", system_category="datacenter", system_availability_status="Available", max_supported_concurrency=max_supported_concurrency, + system_size="1 node", + system_node_ensemble_count=1, + system_node_ensemble_total=1, serving_framework="vLLM", node_types=[_NODE_TYPE], division=division, + model_id="test-model", + model_precision="FP16", + link_to_model="https://example.com/model", + dataset_id="cnn_dailymail", + dataset_name="CNN/DailyMail", + input_token_average=870.0, + output_token_average=128.0, + dataset_type="performance", + dataset_link="https://example.com/dataset", + measured_accuracy_score="38.7", **kwargs, ) diff --git a/tests/submission_checker/test_checker.py b/tests/submission_checker/test_checker.py index 86e4bec..085daf9 100644 --- a/tests/submission_checker/test_checker.py +++ b/tests/submission_checker/test_checker.py @@ -262,6 +262,7 @@ def test_metric_consistency(self, sub_j): _SYSTEM_DESC = { "submitter_org_names": "Test Org", + "submitter_contact": "contact@example.com", "system_name": "test-sys", "system_category": "datacenter", "system_availability_status": "Available", @@ -279,12 +280,30 @@ def test_metric_consistency(self, sub_j): "accelerators_per_node": 8, "accelerator_memory_capacity": "80 GB", "host_networking": "InfiniBand", + "host_network_card_count": "4x NIC", "host_storage_type": "NVMe", "host_storage_capacity": "10 TB", "operating_system": "Ubuntu 22.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM3", + "driver": "550.54", + "filesystem": "ext4", } ], "division": "Serviced", + "system_size": "1 node", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1, + "model_id": "test-model", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", } _SUMMARY = { @@ -326,6 +345,34 @@ def _make_run_yaml(concurrency: int) -> dict: } +def _make_run_metadata(concurrency: int) -> dict: + """A fully-populated, valid run_metadata.json payload.""" + md = { + "run_date": "2026-01-26", + "node_config": "8x H100 test node", + "config_summary": "TP 1, PP 1, DP 1", + "config_summary_notes": None, + "concurrency": concurrency, + "system_tps": 306.94, + "tps_per_user": 2.40, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 1.0, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": None, + "link_logs": None, + } + for group in ("ttft", "tpot", "request"): + for stat, val in ( + ("min", 1.0), ("average", 2.0), ("p50", 2.0), ("p90", 3.0), + ("p95", 3.5), ("p99", 4.0), ("p999", 4.5), ("max", 5.0), + ): + md[f"measured_latency_{group}_{stat}"] = val + return md + + def _build_submission( root: Path, system_id: str = "test-sys", @@ -334,6 +381,7 @@ def _build_submission( write_runs: bool = True, write_results: bool = True, write_accuracy_json: bool = True, + write_run_metadata: bool = True, accuracy_data: dict | None = None, model: str = "llama3-70b", ) -> Path: @@ -363,6 +411,8 @@ def _build_submission( result_dir.mkdir(parents=True) (result_dir / "results_summary.json").write_text(json.dumps(_SUMMARY)) (result_dir / "config.yaml").write_text(yaml.dump({"concurrency": c})) + if write_run_metadata: + (result_dir / "run_metadata.json").write_text(json.dumps(_make_run_metadata(c))) # Write accuracy inside the first point only; checker scans all points if c == concs[0]: accuracy_dir = result_dir / "accuracy" @@ -452,6 +502,64 @@ def test_missing_result_log(self, tmp_path): report = _check(root) assert _errors(report, "result-file-present") + def test_missing_run_metadata_errors(self, tmp_path): + """run-metadata-present error when run_metadata.json is absent from a point.""" + root = _build_submission(tmp_path, write_run_metadata=False) + report = _check(root) + assert _errors(report, "run-metadata-present") + + def test_valid_run_metadata_ok(self, tmp_path): + """A fully-populated run_metadata.json produces a run-metadata-valid ok and no errors.""" + root = _build_submission(tmp_path) + report = _check(root) + assert not _errors(report, "run-metadata-present") + assert not _errors(report, "run-metadata-valid") + assert any(r.rule == "run-metadata-valid" for r in report.results) + + def test_invalid_run_metadata_errors(self, tmp_path): + """A null measurement in run_metadata.json produces a run-metadata-valid error.""" + root = _build_submission(tmp_path) + # Null out a measurement that must be non-null in every point's metadata. + for md in root.rglob("run_metadata.json"): + data = json.loads(md.read_text()) + data["measured_latency_ttft_p99"] = None + md.write_text(json.dumps(data)) + report = _check(root) + assert _errors(report, "run-metadata-valid") + + def _set_tps(self, root, values: dict[int, tuple[float, float]]) -> None: + """Set (system_tps, tps_utilization) per concurrency in each run_metadata.json.""" + for md in root.rglob("run_metadata.json"): + data = json.loads(md.read_text()) + tps, util = values[data["concurrency"]] + data["system_tps"], data["tps_utilization"] = tps, util + md.write_text(json.dumps(data)) + + def test_tps_utilization_consistent_passes(self, tmp_path): + """Correctly normalised tps_utilization yields no tps-utilization error.""" + root = _build_submission(tmp_path, concurrencies=[16, 38]) + # max system_tps = 200 → expected utils 0.5 and 1.0 + self._set_tps(root, {16: (100.0, 0.5), 38: (200.0, 1.0)}) + report = _check(root) + assert not _errors(report, "tps-utilization") + assert any(r.rule == "tps-utilization" for r in report.results) + + def test_tps_utilization_within_tolerance_passes(self, tmp_path): + """A value off by < 0.1 from expected is accepted.""" + root = _build_submission(tmp_path, concurrencies=[16, 38]) + # expected for 16 is 0.5; 0.55 is within abs tol 0.1 + self._set_tps(root, {16: (100.0, 0.55), 38: (200.0, 1.0)}) + report = _check(root) + assert not _errors(report, "tps-utilization") + + def test_tps_utilization_out_of_tolerance_errors(self, tmp_path): + """A value off by > 0.1 from expected produces a tps-utilization error.""" + root = _build_submission(tmp_path, concurrencies=[16, 38]) + # expected for 16 is 0.5; 0.8 is off by 0.3 > 0.1 + self._set_tps(root, {16: (100.0, 0.8), 38: (200.0, 1.0)}) + report = _check(root) + assert _errors(report, "tps-utilization") + def test_missing_config_yaml(self, tmp_path): """result-file-present error when config.yaml is absent from a result dir.""" root = _build_submission(tmp_path) diff --git a/tests/submission_checker/test_models.py b/tests/submission_checker/test_models.py index ba2bec2..2e42776 100644 --- a/tests/submission_checker/test_models.py +++ b/tests/submission_checker/test_models.py @@ -95,20 +95,39 @@ def test_report_model_dump_includes_computed_fields(tmp_path: Path): "accelerators_per_node": 8, "accelerator_memory_capacity": "80 GB HBM2e", "host_networking": "InfiniBand EDR", + "host_network_card_count": "4x NIC", "host_storage_type": "NVMe SSD", "host_storage_capacity": "10 TB", "operating_system": "Ubuntu 20.04", + "host_memory_configuration": "8x 64GB DDR5", + "accelerator_memory_type": "HBM2e", + "driver": "550.54", + "filesystem": "ext4", } _BASE_FLAT = { "submitter_org_names": "Test Org", + "submitter_contact": "contact@example.com", "system_name": "test-node", "system_category": "datacenter", "system_availability_status": "Available", "max_supported_concurrency": 1024, + "system_size": "1 node", + "system_node_ensemble_count": 1, + "system_node_ensemble_total": 1, "serving_framework": "vLLM 0.4.0", "node_types": [_NODE_TYPE], "division": "Standardized", + "model_id": "test-model", + "model_precision": "FP16", + "link_to_model": "https://example.com/model", + "dataset_id": "cnn_dailymail", + "dataset_name": "CNN/DailyMail", + "input_token_average": 870.0, + "output_token_average": 128.0, + "dataset_type": "performance", + "dataset_link": "https://example.com/dataset", + "measured_accuracy_score": "38.7", } @@ -129,6 +148,45 @@ def test_system_description_accepts_vcpu_without_core_count(): assert sd.node_types[0].host_processor_core_count is None +def test_node_requires_core_or_vcpu_count(): + # A node disclosing neither physical cores nor vCPUs is rejected. + node = {**_NODE_TYPE, "host_processor_core_count": None, "host_processor_vcpu_count": None} + with pytest.raises(ValidationError): + SystemDescription(**{**_BASE_FLAT, "node_types": [node]}) + + +@pytest.mark.parametrize( + "field", + [ + "submitter_contact", + "system_size", + "system_node_ensemble_count", + "model_id", + "model_precision", + "link_to_model", + "dataset_id", + "input_token_average", + "dataset_link", + "measured_accuracy_score", + ], +) +def test_required_system_fields_rejected_when_missing(field): + payload = {k: v for k, v in _BASE_FLAT.items() if k != field} + with pytest.raises(ValidationError): + SystemDescription(**payload) + + +@pytest.mark.parametrize( + "field", + ["host_memory_configuration", "accelerator_memory_type", "host_network_card_count", + "driver", "filesystem"], +) +def test_required_node_fields_rejected_when_missing(field): + node = {k: v for k, v in _NODE_TYPE.items() if k != field} + with pytest.raises(ValidationError): + SystemDescription(**{**_BASE_FLAT, "node_types": [node]}) + + def test_system_description_allows_extra_fields(): sd = SystemDescription(**{**_BASE_FLAT, "extra_top_level": "some value"}) assert sd.model_extra["extra_top_level"] == "some value" @@ -272,10 +330,11 @@ def test_dataset_metadata_invalid_string_raises(): # --------------------------------------------------------------------------- -# measured_accuracy_score coercion +# measured_accuracy_score # --------------------------------------------------------------------------- -def test_accuracy_empty_string_measured_score_coerced_to_none(): - sd = SystemDescription(**{**_BASE_FLAT, "measured_accuracy_score": ""}) - assert sd.measured_accuracy_score is None +def test_accuracy_empty_string_measured_score_rejected(): + # measured_accuracy_score is required; an empty string coerces to None and is rejected. + with pytest.raises(ValidationError): + SystemDescription(**{**_BASE_FLAT, "measured_accuracy_score": ""}) diff --git a/tests/submission_checker/test_run_metadata.py b/tests/submission_checker/test_run_metadata.py new file mode 100644 index 0000000..dfe9ae1 --- /dev/null +++ b/tests/submission_checker/test_run_metadata.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 MLCommons +# SPDX-License-Identifier: Apache-2.0 +"""Unit tests for the run_metadata.json model and loader.""" + +from __future__ import annotations + +import json + +import pytest +from pydantic import ValidationError + +from submission_checker.models import RunMetadata, Severity +from submission_checker.models.loader import load_run_metadata + +# Fields whose key must be present but whose value may be null. +_NULLABLE_KEYS = ("config_summary_notes", "link_config", "link_logs") + +# Latency families × statistics that must all be present and non-null. +_LATENCY_FIELDS = [ + f"measured_latency_{group}_{stat}" + for group in ("ttft", "tpot", "request") + for stat in ("min", "average", "p50", "p90", "p95", "p99", "p999", "max") +] + + +def _valid() -> dict: + md = { + "run_date": "2026-01-26", + "node_config": "32x Xeon 6503P + 48x GB200", + "config_summary": "EP 1, PP 2, TP 4, DP 1, batch=24", + "config_summary_notes": None, + "concurrency": 128, + "system_tps": 306.94, + "tps_per_user": 2.40, + "ttft": 312.5, + "qps": 5.07, + "tps_utilization": 0.37, + "measured_total_output_tokens": 60557, + "measured_run_duration": 197.29, + "measured_total_requests": 1000, + "link_config": None, + "link_logs": None, + } + for f in _LATENCY_FIELDS: + md[f] = 1.0 + return md + + +@pytest.mark.unit +class TestRunMetadataModel: + def test_valid_payload(self): + md = RunMetadata.model_validate(_valid()) + assert md.concurrency == 128 + assert md.tps_utilization == 0.37 + + @pytest.mark.parametrize("field", _LATENCY_FIELDS) + def test_latency_must_not_be_null(self, field): + payload = {**_valid(), field: None} + with pytest.raises(ValidationError): + RunMetadata.model_validate(payload) + + @pytest.mark.parametrize("field", _LATENCY_FIELDS) + def test_latency_must_be_present(self, field): + payload = {k: v for k, v in _valid().items() if k != field} + with pytest.raises(ValidationError): + RunMetadata.model_validate(payload) + + @pytest.mark.parametrize( + "field", + ["concurrency", "system_tps", "tps_per_user", "ttft", "qps", "tps_utilization", + "measured_total_output_tokens", "measured_run_duration", "measured_total_requests", + "run_date", "node_config", "config_summary"], + ) + def test_core_fields_must_not_be_null(self, field): + payload = {**_valid(), field: None} + with pytest.raises(ValidationError): + RunMetadata.model_validate(payload) + + @pytest.mark.parametrize("field", _NULLABLE_KEYS) + def test_nullable_keys_accept_null(self, field): + md = RunMetadata.model_validate({**_valid(), field: None}) + assert getattr(md, field) is None + + @pytest.mark.parametrize("field", _NULLABLE_KEYS) + def test_nullable_keys_must_be_present(self, field): + # The key must exist even though the value may be null. + payload = {k: v for k, v in _valid().items() if k != field} + with pytest.raises(ValidationError): + RunMetadata.model_validate(payload) + + def test_config_summary_string_min_length(self): + with pytest.raises(ValidationError): + RunMetadata.model_validate({**_valid(), "config_summary": "x"}) + md = RunMetadata.model_validate({**_valid(), "config_summary": "abcd"}) + assert md.config_summary == "abcd" + + def test_config_summary_as_object(self): + cs = { + "disaggregated": None, + "expert_parallel": None, + "tensor_parallel": 4, + "pipeline_parallel": 2, + "data_parallel": None, + "batch": None, + } + md = RunMetadata.model_validate({**_valid(), "config_summary": cs}) + assert md.config_summary.tensor_parallel == 4 + assert md.config_summary.pipeline_parallel == 2 + + def test_config_summary_object_all_null_ok(self): + cs = dict.fromkeys(("disaggregated", "expert_parallel", "tensor_parallel", "pipeline_parallel", "data_parallel", "batch")) + md = RunMetadata.model_validate({**_valid(), "config_summary": cs}) + assert md.config_summary.batch is None + + def test_config_summary_null_rejected(self): + with pytest.raises(ValidationError): + RunMetadata.model_validate({**_valid(), "config_summary": None}) + + +@pytest.mark.unit +class TestLoadRunMetadata: + def test_valid_file(self, tmp_path): + p = tmp_path / "run_metadata.json" + p.write_text(json.dumps(_valid())) + model, results = load_run_metadata(p) + assert model is not None + assert results == [] + + def test_invalid_file_returns_errors(self, tmp_path): + p = tmp_path / "run_metadata.json" + bad = {**_valid(), "measured_latency_tpot_p50": None} + p.write_text(json.dumps(bad)) + model, results = load_run_metadata(p) + assert model is None + assert results + assert all(r.rule == "run-metadata-valid" and r.severity == Severity.ERROR for r in results) + + def test_missing_file(self, tmp_path): + model, results = load_run_metadata(tmp_path / "absent.json") + assert model is None + assert len(results) == 1 + assert results[0].severity == Severity.ERROR