From 06f0e9002884b0fe775de7ca38dfcad20ce0d74a Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Tue, 23 Sep 2025 11:37:53 -0700 Subject: [PATCH 1/6] Adding Estimate NPU Latency pass and unit test --- olive/olive_config.json | 8 ++ .../onnx/vitis_ai/estimate_npu_latency.py | 85 +++++++++++++++++++ test/passes/onnx/test_estimate_npu_latency.py | 37 ++++++++ 3 files changed, 130 insertions(+) create mode 100644 olive/passes/onnx/vitis_ai/estimate_npu_latency.py create mode 100644 test/passes/onnx/test_estimate_npu_latency.py diff --git a/olive/olive_config.json b/olive/olive_config.json index aaf907fc8..84bfca4a6 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -70,6 +70,14 @@ "supported_quantization_encodings": [ ], "run_on_target": true }, + "EstimateNPULatency": { + "module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency", + "supported_providers": [ "*" ], + "supported_accelerators": [ "*" ], + "supported_precisions": [ "*" ], + "supported_algorithms": [ ], + "supported_quantization_encodings": [ ] + }, "ExtractAdapters": { "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters", "supported_providers": [ "*" ], diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py new file mode 100644 index 000000000..259eb6c60 --- /dev/null +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -0,0 +1,85 @@ +# +# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +# + +import logging +from pathlib import Path + +from olive.hardware.accelerator import AcceleratorSpec +from olive.model import ONNXModelHandler +from olive.model.utils import resolve_onnx_path +from olive.passes import Pass +from olive.passes.onnx.common import model_proto_to_file, resave_model +from olive.passes.pass_config import BasePassConfig, PassConfigParam + +logger = logging.getLogger(__name__) + + +class EstimateNPULatency(Pass): + """Returns latency estimates for the model.""" + + @classmethod + def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]: + return { + "target_device": PassConfigParam( + type_=str, required=False, description="Target device type", default_value="stx" + ) + } + + @classmethod + def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool: + if not super().validate_config(config, accelerator_spec): + return False + + if config.target_device and config.target_device not in ["stx"]: + logger.warning("Unsupported target device type: %s", config.target_device) + return False + + return True + + def _run_for_config( + self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str + ) -> ONNXModelHandler: + perf_installed = True + try: + from estimator.config import EstimatorSettings + from estimator.run import run_perf_estimate + except ImportError: + perf_installed = False + logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.") + + if not isinstance(model, ONNXModelHandler): + raise ValueError("Model must be an instance of ONNXModelHandler") + + input_model_path = model.model_path + + # Bypass if perf estimator package not installed + if perf_installed: + EstimatorSettings.model_path = f"{input_model_path}" + + # Override default parameters if specified + if config.target_device: + EstimatorSettings.target_device = config.target_device + EstimatorSettings.initialized = True + + logger.info( + "Running perf estimator for model path: %s and target device: %s", + input_model_path, + EstimatorSettings.target_device, + ) + + run_perf_estimate(EstimatorSettings) + + logger.info("Finish running perf estimator pass") + + # return the original model + output_model_path = Path(resolve_onnx_path(output_model_path, Path(model.model_path).name)) + has_external_data = resave_model(model.model_path, output_model_path) + onnx_model = model.load_model() + model_proto_to_file(onnx_model, output_model_path) + + return ONNXModelHandler( + model_path=output_model_path.parent if has_external_data else output_model_path, + onnx_file_name=output_model_path.name if has_external_data else None + ) diff --git a/test/passes/onnx/test_estimate_npu_latency.py b/test/passes/onnx/test_estimate_npu_latency.py new file mode 100644 index 000000000..48e314ea9 --- /dev/null +++ b/test/passes/onnx/test_estimate_npu_latency.py @@ -0,0 +1,37 @@ +# +# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +# +import os +from pathlib import Path + +import onnx + +from olive.passes.olive_pass import create_pass_from_dict +from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency +from test.utils import get_onnx_model + + +class TestEstimateNPULatency: + """Test cases for EstimateNPULatency pass.""" + + def test_estimate_latency_basic(self, tmp_path): + """Test Perf Estimator call with automatic Olive version.""" + # Setup + input_model = get_onnx_model() + config = {} + p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True) + output_folder = str(tmp_path / "onnx") + + # Execute + output_model = p.run(input_model, output_folder) + + # Assert we created output csv for latency results + estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary" + assert Path(estimates_csv).exists() + + # Assert + assert Path(output_model.model_path).exists() + # Load the output model and check graph name + onnx_model = onnx.load_model(output_model.model_path) + assert onnx_model.graph.name == "main_graph" From 9e35b6bf91f5d825279c9501d1473ad54c009a9b Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Wed, 1 Oct 2025 16:25:07 -0700 Subject: [PATCH 2/6] Fixed lint issues --- .../passes/onnx/vitis_ai/estimate_npu_latency.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py index 259eb6c60..2b8b2f553 100644 --- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -4,13 +4,10 @@ # import logging -from pathlib import Path from olive.hardware.accelerator import AcceleratorSpec from olive.model import ONNXModelHandler -from olive.model.utils import resolve_onnx_path from olive.passes import Pass -from olive.passes.onnx.common import model_proto_to_file, resave_model from olive.passes.pass_config import BasePassConfig, PassConfigParam logger = logging.getLogger(__name__) @@ -73,13 +70,5 @@ def _run_for_config( logger.info("Finish running perf estimator pass") - # return the original model - output_model_path = Path(resolve_onnx_path(output_model_path, Path(model.model_path).name)) - has_external_data = resave_model(model.model_path, output_model_path) - onnx_model = model.load_model() - model_proto_to_file(onnx_model, output_model_path) - - return ONNXModelHandler( - model_path=output_model_path.parent if has_external_data else output_model_path, - onnx_file_name=output_model_path.name if has_external_data else None - ) + # Return the original model as is + return model From d4308edf83156c305ed39e81910272c0b4cb8741 Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Thu, 9 Oct 2025 13:12:38 -0700 Subject: [PATCH 3/6] Addressed feedback --- olive/olive_config.json | 3 ++- olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 3 ++- test/requirements-test.txt | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/olive/olive_config.json b/olive/olive_config.json index 84bfca4a6..4898da6bd 100644 --- a/olive/olive_config.json +++ b/olive/olive_config.json @@ -76,7 +76,8 @@ "supported_accelerators": [ "*" ], "supported_precisions": [ "*" ], "supported_algorithms": [ ], - "supported_quantization_encodings": [ ] + "supported_quantization_encodings": [ ], + "module_dependencies": [ "perf-estimator" ] }, "ExtractAdapters": { "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters", diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py index 2b8b2f553..2cc454508 100644 --- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -44,7 +44,8 @@ def _run_for_config( from estimator.run import run_perf_estimate except ImportError: perf_installed = False - logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.") + logger.error("Estimator module not found. Install perf-estimator package" + " and delete cached run before rerunning.") if not isinstance(model, ONNXModelHandler): raise ValueError("Model must be an instance of ONNXModelHandler") diff --git a/test/requirements-test.txt b/test/requirements-test.txt index a616d8e5a..2b4779eab 100644 --- a/test/requirements-test.txt +++ b/test/requirements-test.txt @@ -29,6 +29,7 @@ optimum[openvino]>=1.17.0, <=1.24 optuna pandas peft +perf-estimator plotly polygraphy>=0.49.22 psutil From 6a236080a3a793d9b016e860bff115ca6bbf9191 Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Thu, 9 Oct 2025 15:00:59 -0700 Subject: [PATCH 4/6] Fix EstimatorSettings bug --- olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py index 2cc454508..5b368d0e1 100644 --- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -59,7 +59,6 @@ def _run_for_config( # Override default parameters if specified if config.target_device: EstimatorSettings.target_device = config.target_device - EstimatorSettings.initialized = True logger.info( "Running perf estimator for model path: %s and target device: %s", @@ -68,7 +67,6 @@ def _run_for_config( ) run_perf_estimate(EstimatorSettings) - logger.info("Finish running perf estimator pass") # Return the original model as is From cc281adea10a97ab5b95d4aa48597e3abc256315 Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Fri, 10 Oct 2025 16:03:27 -0700 Subject: [PATCH 5/6] Fix lint error --- olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py index 5b368d0e1..49345a9f6 100644 --- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -44,7 +44,7 @@ def _run_for_config( from estimator.run import run_perf_estimate except ImportError: perf_installed = False - logger.error("Estimator module not found. Install perf-estimator package" + logger.exception("Estimator module not found. Install perf-estimator package" " and delete cached run before rerunning.") if not isinstance(model, ONNXModelHandler): From cf4b6ea7ef820524d15ad766af1ec93131bff62b Mon Sep 17 00:00:00 2001 From: Alina Hwang Date: Mon, 13 Oct 2025 13:34:14 -0700 Subject: [PATCH 6/6] Fixed lint error --- olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py index 49345a9f6..cf5c99a66 100644 --- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py +++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py @@ -44,8 +44,9 @@ def _run_for_config( from estimator.run import run_perf_estimate except ImportError: perf_installed = False - logger.exception("Estimator module not found. Install perf-estimator package" - " and delete cached run before rerunning.") + logger.exception( + "Estimator module not found. Install perf-estimator package and delete cached run before rerunning." + ) if not isinstance(model, ONNXModelHandler): raise ValueError("Model must be an instance of ONNXModelHandler")