diff --git a/olive/olive_config.json b/olive/olive_config.json index c5e5f9d58..cfddad455 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -70,6 +70,15 @@ "supported_quantization_encodings": [ ], "run_on_target": true }, + "EstimateNPULatency": { + "module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency", + "supported_providers": [ "*" ], + "supported_accelerators": [ "*" ], + "supported_precisions": [ "*" ], + "supported_algorithms": [ ], + "supported_quantization_encodings": [ ], + "module_dependencies": [ "perf-estimator" ] + }, "ExtractAdapters": { "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters", "supported_providers": [ "*" ], diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py new file mode 100644 index 000000000..cf5c99a66 --- /dev/null +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -0,0 +1,74 @@ +# +# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +# + +import logging + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import ONNXModelHandler +from olive.passes import Pass +from olive.passes.pass_config import BasePassConfig, PassConfigParam + +logger = logging.getLogger(__name__) + + +class EstimateNPULatency(Pass): + """Returns latency estimates for the model.""" + + @classmethod + def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: + return { + "target_device": PassConfigParam( + type_=str, required=False, description="Target device type", default_value="stx" + ) + } + + @classmethod + def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool: + if not super().validate_config(config, accelerator_spec): + return False + + if config.target_device and config.target_device not in ["stx"]: + logger.warning("Unsupported target device type: %s", config.target_device) + return False + + return True + + def _run_for_config( + self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str + ) -> ONNXModelHandler: + perf_installed = True + try: + from estimator.config import EstimatorSettings + from estimator.run import run_perf_estimate + except ImportError: + perf_installed = False + logger.exception( + "Estimator module not found. Install perf-estimator package and delete cached run before rerunning." + ) + + if not isinstance(model, ONNXModelHandler): + raise ValueError("Model must be an instance of ONNXModelHandler") + + input_model_path = model.model_path + + # Bypass if perf estimator package not installed + if perf_installed: + EstimatorSettings.model_path = f"{input_model_path}" + + # Override default parameters if specified + if config.target_device: + EstimatorSettings.target_device = config.target_device + + logger.info( + "Running perf estimator for model path: %s and target device: %s", + input_model_path, + EstimatorSettings.target_device, + ) + + run_perf_estimate(EstimatorSettings) + logger.info("Finish running perf estimator pass") + + # Return the original model as is + return model diff --git a/test/passes/onnx/test_estimate_npu_latency.py b/test/passes/onnx/test_estimate_npu_latency.py new file mode 100644 index 000000000..48e314ea9 --- /dev/null +++ b/test/passes/onnx/test_estimate_npu_latency.py @@ -0,0 +1,37 @@ +# +# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +# +import os +from pathlib import Path + +import onnx + +from olive.passes.olive_pass import create_pass_from_dict +from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency +from test.utils import get_onnx_model + + +class TestEstimateNPULatency: + """Test cases for EstimateNPULatency pass.""" + + def test_estimate_latency_basic(self, tmp_path): + """Test Perf Estimator call with automatic Olive version.""" + # Setup + input_model = get_onnx_model() + config = {} + p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True) + output_folder = str(tmp_path / "onnx") + + # Execute + output_model = p.run(input_model, output_folder) + + # Assert we created output csv for latency results + estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary" + assert Path(estimates_csv).exists() + + # Assert + assert Path(output_model.model_path).exists() + # Load the output model and check graph name + onnx_model = onnx.load_model(output_model.model_path) + assert onnx_model.graph.name == "main_graph" diff --git a/test/requirements-test.txt b/test/requirements-test.txt index bdc1ddc7c..2e220ab2f 100644 --- a/test/requirements-test.txt +++ b/test/requirements-test.txt @@ -28,6 +28,7 @@ optimum-intel[openvino]>=1.17.0, <=1.24 optuna pandas peft +perf-estimator plotly polygraphy>=0.49.22 psutil