From 06f0e9002884b0fe775de7ca38dfcad20ce0d74a Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Tue, 23 Sep 2025 11:37:53 -0700
Subject: [PATCH 1/6] Adding Estimate NPU Latency pass and unit test

---
 olive/olive_config.json                       |  8 ++
 .../onnx/vitis_ai/estimate_npu_latency.py     | 85 +++++++++++++++++++
 test/passes/onnx/test_estimate_npu_latency.py | 37 ++++++++
 3 files changed, 130 insertions(+)
 create mode 100644 olive/passes/onnx/vitis_ai/estimate_npu_latency.py
 create mode 100644 test/passes/onnx/test_estimate_npu_latency.py

diff --git a/olive/olive_config.json b/olive/olive_config.json
index aaf907fc8..84bfca4a6 100644
--- a/olive/olive_config.json
+++ b/olive/olive_config.json
@@ -70,6 +70,14 @@
             "supported_quantization_encodings": [  ],
             "run_on_target": true
         },
+        "EstimateNPULatency": {
+            "module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency",
+            "supported_providers": [ "*" ],
+            "supported_accelerators": [ "*" ],
+            "supported_precisions": [ "*" ],
+            "supported_algorithms": [  ],
+            "supported_quantization_encodings": [  ]
+        },
         "ExtractAdapters": {
             "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
             "supported_providers": [ "*" ],
diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
new file mode 100644
index 000000000..259eb6c60
--- /dev/null
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -0,0 +1,85 @@
+#
+# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+import logging
+from pathlib import Path
+
+from olive.hardware.accelerator import AcceleratorSpec
+from olive.model import ONNXModelHandler
+from olive.model.utils import resolve_onnx_path
+from olive.passes import Pass
+from olive.passes.onnx.common import model_proto_to_file, resave_model
+from olive.passes.pass_config import BasePassConfig, PassConfigParam
+
+logger = logging.getLogger(__name__)
+
+
+class EstimateNPULatency(Pass):
+    """Returns latency estimates for the model."""
+
+    @classmethod
+    def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
+        return {
+            "target_device": PassConfigParam(
+                type_=str, required=False, description="Target device type", default_value="stx"
+            )
+        }
+
+    @classmethod
+    def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
+        if not super().validate_config(config, accelerator_spec):
+            return False
+
+        if config.target_device and config.target_device not in ["stx"]:
+            logger.warning("Unsupported target device type: %s", config.target_device)
+            return False
+
+        return True
+
+    def _run_for_config(
+        self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str
+    ) -> ONNXModelHandler:
+        perf_installed = True
+        try:
+            from estimator.config import EstimatorSettings
+            from estimator.run import run_perf_estimate
+        except ImportError:
+            perf_installed = False
+            logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.")
+
+        if not isinstance(model, ONNXModelHandler):
+            raise ValueError("Model must be an instance of ONNXModelHandler")
+
+        input_model_path = model.model_path
+
+        # Bypass if perf estimator package not installed
+        if perf_installed:
+            EstimatorSettings.model_path = f"{input_model_path}"
+
+            # Override default parameters if specified
+            if config.target_device:
+                EstimatorSettings.target_device = config.target_device
+            EstimatorSettings.initialized = True
+
+            logger.info(
+                "Running perf estimator for model path: %s and target device: %s",
+                input_model_path,
+                EstimatorSettings.target_device,
+            )
+
+            run_perf_estimate(EstimatorSettings)
+
+            logger.info("Finish running perf estimator pass")
+
+        # return the original model
+        output_model_path = Path(resolve_onnx_path(output_model_path, Path(model.model_path).name))
+        has_external_data = resave_model(model.model_path, output_model_path)
+        onnx_model = model.load_model()
+        model_proto_to_file(onnx_model, output_model_path)
+
+        return ONNXModelHandler(
+            model_path=output_model_path.parent if has_external_data else output_model_path,
+            onnx_file_name=output_model_path.name if has_external_data else None
+        )
diff --git a/test/passes/onnx/test_estimate_npu_latency.py b/test/passes/onnx/test_estimate_npu_latency.py
new file mode 100644
index 000000000..48e314ea9
--- /dev/null
+++ b/test/passes/onnx/test_estimate_npu_latency.py
@@ -0,0 +1,37 @@
+#
+# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+import os
+from pathlib import Path
+
+import onnx
+
+from olive.passes.olive_pass import create_pass_from_dict
+from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency
+from test.utils import get_onnx_model
+
+
+class TestEstimateNPULatency:
+    """Test cases for EstimateNPULatency pass."""
+
+    def test_estimate_latency_basic(self, tmp_path):
+        """Test Perf Estimator call with automatic Olive version."""
+        # Setup
+        input_model = get_onnx_model()
+        config = {}
+        p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True)
+        output_folder = str(tmp_path / "onnx")
+
+        # Execute
+        output_model = p.run(input_model, output_folder)
+
+        # Assert we created output csv for latency results
+        estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary"
+        assert Path(estimates_csv).exists()
+
+        # Assert
+        assert Path(output_model.model_path).exists()
+        # Load the output model and check graph name
+        onnx_model = onnx.load_model(output_model.model_path)
+        assert onnx_model.graph.name == "main_graph"

From 9e35b6bf91f5d825279c9501d1473ad54c009a9b Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Wed, 1 Oct 2025 16:25:07 -0700
Subject: [PATCH 2/6] Fixed lint issues

---
 .../passes/onnx/vitis_ai/estimate_npu_latency.py  | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
index 259eb6c60..2b8b2f553 100644
--- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -4,13 +4,10 @@
 #
 
 import logging
-from pathlib import Path
 
 from olive.hardware.accelerator import AcceleratorSpec
 from olive.model import ONNXModelHandler
-from olive.model.utils import resolve_onnx_path
 from olive.passes import Pass
-from olive.passes.onnx.common import model_proto_to_file, resave_model
 from olive.passes.pass_config import BasePassConfig, PassConfigParam
 
 logger = logging.getLogger(__name__)
@@ -73,13 +70,5 @@ def _run_for_config(
 
             logger.info("Finish running perf estimator pass")
 
-        # return the original model
-        output_model_path = Path(resolve_onnx_path(output_model_path, Path(model.model_path).name))
-        has_external_data = resave_model(model.model_path, output_model_path)
-        onnx_model = model.load_model()
-        model_proto_to_file(onnx_model, output_model_path)
-
-        return ONNXModelHandler(
-            model_path=output_model_path.parent if has_external_data else output_model_path,
-            onnx_file_name=output_model_path.name if has_external_data else None
-        )
+        # Return the original model as is
+        return model

From d4308edf83156c305ed39e81910272c0b4cb8741 Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Thu, 9 Oct 2025 13:12:38 -0700
Subject: [PATCH 3/6] Addressed feedback

---
 olive/olive_config.json                            | 3 ++-
 olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 3 ++-
 test/requirements-test.txt                         | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/olive/olive_config.json b/olive/olive_config.json
index 84bfca4a6..4898da6bd 100644
--- a/olive/olive_config.json
+++ b/olive/olive_config.json
@@ -76,7 +76,8 @@
             "supported_accelerators": [ "*" ],
             "supported_precisions": [ "*" ],
             "supported_algorithms": [  ],
-            "supported_quantization_encodings": [  ]
+            "supported_quantization_encodings": [  ],
+            "module_dependencies": [ "perf-estimator" ]
         },
         "ExtractAdapters": {
             "module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
index 2b8b2f553..2cc454508 100644
--- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -44,7 +44,8 @@ def _run_for_config(
             from estimator.run import run_perf_estimate
         except ImportError:
             perf_installed = False
-            logger.warning("Estimator module not found. Skipping EstimateNPULatency pass.")
+            logger.error("Estimator module not found. Install perf-estimator package"
+                        " and delete cached run before rerunning.")
 
         if not isinstance(model, ONNXModelHandler):
             raise ValueError("Model must be an instance of ONNXModelHandler")
diff --git a/test/requirements-test.txt b/test/requirements-test.txt
index a616d8e5a..2b4779eab 100644
--- a/test/requirements-test.txt
+++ b/test/requirements-test.txt
@@ -29,6 +29,7 @@ optimum[openvino]>=1.17.0, <=1.24
 optuna
 pandas
 peft
+perf-estimator
 plotly
 polygraphy>=0.49.22
 psutil

From 6a236080a3a793d9b016e860bff115ca6bbf9191 Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Thu, 9 Oct 2025 15:00:59 -0700
Subject: [PATCH 4/6] Fix EstimatorSettings bug

---
 olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
index 2cc454508..5b368d0e1 100644
--- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -59,7 +59,6 @@ def _run_for_config(
             # Override default parameters if specified
             if config.target_device:
                 EstimatorSettings.target_device = config.target_device
-            EstimatorSettings.initialized = True
 
             logger.info(
                 "Running perf estimator for model path: %s and target device: %s",
@@ -68,7 +67,6 @@ def _run_for_config(
             )
 
             run_perf_estimate(EstimatorSettings)
-
             logger.info("Finish running perf estimator pass")
 
         # Return the original model as is

From cc281adea10a97ab5b95d4aa48597e3abc256315 Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Fri, 10 Oct 2025 16:03:27 -0700
Subject: [PATCH 5/6] Fix lint error

---
 olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
index 5b368d0e1..49345a9f6 100644
--- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -44,7 +44,7 @@ def _run_for_config(
             from estimator.run import run_perf_estimate
         except ImportError:
             perf_installed = False
-            logger.error("Estimator module not found. Install perf-estimator package"
+            logger.exception("Estimator module not found. Install perf-estimator package"
                         " and delete cached run before rerunning.")
 
         if not isinstance(model, ONNXModelHandler):

From cf4b6ea7ef820524d15ad766af1ec93131bff62b Mon Sep 17 00:00:00 2001
From: Alina Hwang <alinah@amd.com>
Date: Mon, 13 Oct 2025 13:34:14 -0700
Subject: [PATCH 6/6] Fixed lint error

---
 olive/passes/onnx/vitis_ai/estimate_npu_latency.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
index 49345a9f6..cf5c99a66 100644
--- a/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
+++ b/olive/passes/onnx/vitis_ai/estimate_npu_latency.py
@@ -44,8 +44,9 @@ def _run_for_config(
             from estimator.run import run_perf_estimate
         except ImportError:
             perf_installed = False
-            logger.exception("Estimator module not found. Install perf-estimator package"
-                        " and delete cached run before rerunning.")
+            logger.exception(
+                "Estimator module not found. Install perf-estimator package and delete cached run before rerunning."
+            )
 
         if not isinstance(model, ONNXModelHandler):
             raise ValueError("Model must be an instance of ONNXModelHandler")