diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b81df38f..dd4d38b26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**]) - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**]) - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**]) +- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([#641]) ([**@linus-hologram**]) - πŸ”§ Replace `mypy` with `ty` ([#572]) ([**@denialhaag**]) - πŸ› Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**]) @@ -65,6 +66,7 @@ _πŸ“š Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool [#393]: https://github.com/munich-quantum-toolkit/predictor/pull/393 [#385]: https://github.com/munich-quantum-toolkit/predictor/pull/385 [#360]: https://github.com/munich-quantum-toolkit/predictor/pull/360 +[#641]: https://github.com/munich-quantum-toolkit/predictor/pull/641 @@ -74,6 +76,7 @@ _πŸ“š Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool [**@denialhaag**]: https://github.com/denialhaag [**@bachase**]: https://github.com/bachase [**@Shaobo-Zhou**]: https://github.com/Shaobo-Zhou +[**@linus-hologram**]: https://github.com/linus-hologram diff --git a/src/mqt/predictor/qcompile.py b/src/mqt/predictor/qcompile.py index d65be982e..a1f8240f0 100644 --- a/src/mqt/predictor/qcompile.py +++ b/src/mqt/predictor/qcompile.py @@ -16,6 +16,8 @@ from mqt.predictor.rl import rl_compile if TYPE_CHECKING: + from pathlib import Path + from qiskit import QuantumCircuit from mqt.predictor.reward import figure_of_merit @@ -24,16 +26,20 @@ def qcompile( qc: QuantumCircuit, figure_of_merit: figure_of_merit = "expected_fidelity", + tracer_output_path: str | Path | None = None, ) -> tuple[QuantumCircuit, list[str], str]: """Compiles a given quantum circuit to a device with the highest predicted figure of merit. Arguments: qc: The quantum circuit to be compiled. figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity". + tracer_output_path: If provided, enables compiler tracing and exports the JSON log to this path/directory. Returns: A tuple containing the compiled quantum circuit, the compilation information, and the name of the device used for compilation. """ predicted_device = predict_device_for_figure_of_merit(qc, figure_of_merit=figure_of_merit) - res = rl_compile(qc, device=predicted_device, figure_of_merit=figure_of_merit) + res = rl_compile( + qc, device=predicted_device, figure_of_merit=figure_of_merit, tracer_output_path=tracer_output_path + ) return *res, predicted_device diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py index 0e32d63d2..0e0e31be6 100644 --- a/src/mqt/predictor/rl/actions.py +++ b/src/mqt/predictor/rl/actions.py @@ -10,7 +10,6 @@ from __future__ import annotations -import os import sys import warnings from collections import defaultdict @@ -18,8 +17,6 @@ from enum import Enum from typing import TYPE_CHECKING -from bqskit import MachineModel -from bqskit import compile as bqskit_compile from pytket.architecture import Architecture from pytket.passes import ( CliffordSimp, @@ -78,7 +75,6 @@ from mqt.predictor.rl.parsing import ( PreProcessTKETRoutingAfterQiskitLayout, - get_bqskit_native_gates, ) IS_WIN_PY313 = sys.platform == "win32" and sys.version_info[:2] == (3, 13) @@ -366,7 +362,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitO2", # CompilationOrigin.BQSKIT, @@ -380,7 +376,7 @@ def remove_action(name: str) -> None: # num_workers=-1, # ), # ) -#) +# ) register_action( DeviceDependentAction( @@ -526,7 +522,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitMapping", # CompilationOrigin.BQSKIT, @@ -548,7 +544,7 @@ def remove_action(name: str) -> None: # ) # ), # ) -#) +# ) register_action( DeviceDependentAction( @@ -561,7 +557,7 @@ def remove_action(name: str) -> None: ) ) -#register_action( +# register_action( # DeviceDependentAction( # "BQSKitSynthesis", # CompilationOrigin.BQSKIT, @@ -578,7 +574,7 @@ def remove_action(name: str) -> None: # ) # ), # ) -#) +# ) register_action( DeviceIndependentAction( diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py index fe4b400ef..ee9321f87 100644 --- a/src/mqt/predictor/rl/predictor.py +++ b/src/mqt/predictor/rl/predictor.py @@ -40,6 +40,7 @@ def __init__( mdp: str = "paper", path_training_circuits: Path | None = None, logger_level: int = logging.INFO, + tracer_output_path: str | Path | None = None, ) -> None: """Initializes the Predictor object.""" logger.setLevel(logger_level) @@ -49,6 +50,7 @@ def __init__( device=device, mdp=mdp, path_training_circuits=path_training_circuits, + tracer_output_path=tracer_output_path, ) self.device_name = device.description self.figure_of_merit = figure_of_merit @@ -178,6 +180,7 @@ def rl_compile( device: Target | None, figure_of_merit: figure_of_merit | None = "expected_fidelity", predictor_singleton: Predictor | None = None, + tracer_output_path: str | Path | None = None, ) -> tuple[QuantumCircuit, list[str]]: """Compiles a given quantum circuit to a device optimizing for the given figure of merit. @@ -186,6 +189,7 @@ def rl_compile( device: The device to compile to. figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity". predictor_singleton: A predictor object that is used for compilation to reduce compilation time when compiling multiple quantum circuits. If None, a new predictor object is created. Defaults to None. + tracer_output_path: If provided, enables compiler tracing and exports the JSON log to the specified path. Returns: A tuple containing the compiled quantum circuit and the compilation information. If compilation fails, False is returned. @@ -200,8 +204,14 @@ def rl_compile( if device is None: msg = "device must not be None if predictor_singleton is None." raise ValueError(msg) - predictor = Predictor(figure_of_merit=figure_of_merit, device=device) - else: - predictor = predictor_singleton - - return predictor.compile_as_predicted(qc) + predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path) + return predictor.compile_as_predicted(qc) + + # use singleton and restore tracer path afterward + predictor = predictor_singleton + original_tracer_output_path = predictor.env.tracer_output_path + predictor.env.tracer_output_path = tracer_output_path + try: + return predictor.compile_as_predicted(qc) + finally: + predictor.env.tracer_output_path = original_tracer_output_path diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py index 457a9a1ff..ecc481063 100644 --- a/src/mqt/predictor/rl/predictorenv.py +++ b/src/mqt/predictor/rl/predictorenv.py @@ -14,10 +14,13 @@ from pathlib import Path from typing import TYPE_CHECKING, Any +import numpy as np + if TYPE_CHECKING: from collections.abc import Callable from bqskit import Circuit + from numpy.typing import NDArray from pytket._tket.passes import BasePass as TketBasePass from pytket.circuit import Node from qiskit.passmanager.base_tasks import Task @@ -34,7 +37,6 @@ from math import isclose from typing import cast -import numpy as np from bqskit.ext import bqskit_to_qiskit, qiskit_to_bqskit from gymnasium import Env from gymnasium.spaces import Box, Dict, Discrete @@ -86,6 +88,11 @@ postprocess_vf2postlayout, prepare_noise_data, ) +from mqt.predictor.rl.tracer import ( + CompilationTracer, + FigureOfMeritMetrics, + FOMMetric, +) from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl logger = logging.getLogger("mqt-predictor") @@ -131,6 +138,7 @@ def __init__( path_training_circuits: Path | None = None, reward_scale: float = 1.0, no_effect_penalty: float = -0.001, + tracer_output_path: str | Path | None = None, ) -> None: """Initializes the PredictorEnv object. @@ -145,6 +153,7 @@ def __init__( path_training_circuits: The path to the training circuits folder. Defaults to None, which uses the default path. reward_scale: Scaling factor for rewards/penalties proportional to fidelity changes. no_effect_penalty: Step penalty applied when an action does not change the circuit (no-op). + tracer_output_path: Whether to enable compilation tracing. If provided, this will export a JSON file at the end of the compilation process. Defaults to None. Raises: ValueError: If the reward function is "estimated_success_probability" and no calibration data is available for the device or if the reward function is "estimated_hellinger_distance" and no trained model is available for the device. @@ -163,6 +172,9 @@ def __init__( self.actions_structure_preserving_indices = [] # Actions that preserves the mapping and native gates self.used_actions: list[str] = [] self.device = device + self.tracer_output_path = tracer_output_path + self.hellinger_model = None + self.tracer = None logger.info("MDP: " + mdp) self.mdp = mdp @@ -245,8 +257,8 @@ def __init__( self.readout_err: dict[Node, float] | None = None self.reward_scale = reward_scale self.no_effect_penalty = no_effect_penalty - self.prev_reward: float | None = None - self.prev_reward_kind: str | None = None + self.prev_reward: float = 0.0 + self.prev_reward_kind: str = "unknown" self.episode_count = 0 self.current_circuit_name = "" self.err_by_gate: dict[str, float] = {} @@ -289,7 +301,86 @@ def export_circuit(self, qc: QuantumCircuit | None = None) -> QuantumCircuit: exported._layout = self.layout # noqa: SLF001 return exported - def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, done: bool) -> None: + def _collect_tracer_data( + self, + step_index: int, + action_name: str, + reward_val: float, + fom_value: float, + fom_kind: str, + feature_vector: dict[str, int | NDArray[np.float32]], + done: bool, + ) -> None: + """Collects the current compilation state and sends it to the tracer.""" + if self.tracer is not None and self.tracer_output_path is not None: + synthesized, laid_out, routed = self._get_compilation_state_flags() + + # Collect figures of merit + hd_metric: FOMMetric | None = None + cd_metric: FOMMetric + ef_metric: FOMMetric + esp_metric: FOMMetric | None = None + + if self.reward_function == "expected_fidelity": + ef_metric = FOMMetric(value=fom_value, kind=fom_kind) + else: + val, kind = self.calculate_expected_fidelity(qc=self.state, mode="auto") + ef_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "estimated_success_probability": + esp_metric = FOMMetric(value=fom_value, kind=fom_kind) + elif esp_data_available(self.device): + val, kind = self.calculate_estimated_success_probability(qc=self.state, mode="auto") + esp_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "critical_depth": + cd_metric = FOMMetric(value=fom_value, kind=fom_kind) + else: + val, kind = self.calculate_critical_depth(qc=self.state) + cd_metric = FOMMetric(value=val, kind=kind) + + if self.reward_function == "estimated_hellinger_distance": + hd_metric = FOMMetric(value=fom_value, kind=fom_kind) + elif self.hellinger_model is not None: + val, kind = self.calculate_estimated_hellinger_distance(qc=self.state) + hd_metric = FOMMetric(value=val, kind=kind) + + metrics = FigureOfMeritMetrics( + expected_fidelity=ef_metric, + success_probability=esp_metric, + critical_depth=cd_metric, + hellinger_distance=hd_metric, + ) + + self.tracer.record_step( + step_index=step_index, + action=action_name, + reward=reward_val, + current_qc=self.state, + figures_of_merit=metrics, + features=feature_vector, + synthesized=synthesized, + laid_out=laid_out, + routed=routed, + done=done, + ) + + if done: + out_path = Path(self.tracer_output_path) + + if out_path.is_dir() or not out_path.suffix: + out_path = out_path / f"visualization_{self.current_circuit_name}.json" + + self.tracer.save_to_json(out_path) + logger.info("βœ…TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve()) + + def _log_step_reward( + self, + step_index: int, + action_name: str, + reward_val: float, + done: bool, + ) -> None: """Log the chosen action and resulting reward for the current episode step.""" logger.info( "Episode %d step %d: action=%s reward=%.6f", @@ -346,19 +437,35 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any altered_qc = self._apply_and_update(action) if altered_qc is None: - self._log_step_reward(step_index, action_name, 0.0, done=True) - return self._create_observation(), 0.0, True, False, {} + obs = self._create_observation() + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=True) + self._collect_tracer_data( + step_index=step_index, + action_name=action_name, + reward_val=0.0, + fom_value=0.0, + fom_kind="exact", + feature_vector=obs, + done=True, + ) + return obs, 0.0, True, False, {} done = action == self.action_terminate_index if self.reward_function == "estimated_hellinger_distance": reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0 - self._log_step_reward(step_index, action_name, reward_val, done) - return self._create_observation(), reward_val, done, False, {} - - # Lazy init: compute prev_reward only once per episode (or if missing) - if self.prev_reward is None: - self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") + obs = self._create_observation() + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done) + self._collect_tracer_data( + step_index=step_index, + action_name=action_name, + reward_val=reward_val, + fom_value=reward_val, + fom_kind="exact", + feature_vector=obs, + done=done, + ) + return obs, reward_val, done, False, {} if done: assert action in self.valid_actions, "Terminate action is not valid but was chosen." @@ -385,9 +492,157 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any self.prev_reward, self.prev_reward_kind = new_val, new_kind obs = self._create_observation() - self._log_step_reward(step_index, action_name, reward_val, done) + self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done) + self._collect_tracer_data( + step_index=step_index, + action_name=action_name, + reward_val=reward_val, + fom_value=self.prev_reward, + fom_kind=self.prev_reward_kind, + feature_vector=obs, + done=done, + ) + return obs, reward_val, done, False, {} + def _resolve_evaluation_kind(self, qc: QuantumCircuit, mode: str) -> tuple[str, Any]: + """Resolves whether to use 'exact' or 'approx' evaluation based on the circuit state.""" + reward_layout = getattr(qc, "_layout", None) + if reward_layout is None: + reward_layout = self.layout + + if mode == "exact": + return "exact", reward_layout + if mode == "approx": + return "approx", reward_layout + + # "auto" resolution + only_native = self.is_circuit_synthesized(qc) + laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False + mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False + + kind = "exact" if (only_native and laid_out and mapped) else "approx" + return kind, reward_layout + + def _prepare_exact_qc(self, qc: QuantumCircuit, reward_layout: TranspileLayout | Layout | None) -> QuantumCircuit: + """Prepares the circuit for exact evaluation by exporting it if necessary.""" + if reward_layout is None or getattr(qc, "_layout", None) is not None: + return qc + return self.export_circuit(qc) + + def calculate_expected_fidelity(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: + """Calculates the expected fidelity for the given quantum circuit. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + mode: + Selects how the method chooses between exact and approximate evaluation: + + - ``"auto"`` (default): determines computation automatically. + - ``"exact"``: always compute the exact, calibration-aware metric. + - ``"approx"``: always compute the approximate, transpile-based proxy. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the expected fidelity (float). + - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. + """ + if qc is None: + qc = self.state + + kind, reward_layout = self._resolve_evaluation_kind(qc, mode) + + if kind == "exact": + exact_qc = self._prepare_exact_qc(qc, reward_layout) + return expected_fidelity(exact_qc, self.device), "exact" + + self._ensure_device_averages_cached() + val = approx_expected_fidelity(qc, device=self.device, error_rates=self.err_by_gate) + return val, "approx" + + def calculate_estimated_success_probability( + self, qc: QuantumCircuit | None = None, mode: str = "auto" + ) -> tuple[float, str]: + """Calculates the estimated success probability (ESP) for the given quantum circuit. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + mode: + Selects how the method chooses between exact and approximate evaluation: + + - ``"auto"`` (default): determines computation automatically. + - ``"exact"``: always compute the exact, calibration-aware metric. + - ``"approx"``: always compute the approximate, transpile-based proxy. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the estimated success probability (float). + - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used. + """ + if qc is None: + qc = self.state + + kind, reward_layout = self._resolve_evaluation_kind(qc, mode) + + if kind == "exact": + exact_qc = self._prepare_exact_qc(qc, reward_layout) + return estimated_success_probability(exact_qc, self.device), "exact" + + self._ensure_device_averages_cached() + feats = calc_supermarq_features(qc) + val = approx_estimated_success_probability( + qc, + device=self.device, + error_rates=self.err_by_gate, + gate_durations=self.dur_by_gate, + tbar=self.tbar, + par_feature=float(feats.parallelism), + liv_feature=float(feats.liveness), + n_qubits=int(qc.num_qubits), + ) + return val, "approx" + + def calculate_critical_depth(self, qc: QuantumCircuit | None = None) -> tuple[float, str]: + """Calculates the critical depth for the given quantum circuit. + + Note: + Critical depth is always computed exactly. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the critical depth (float). + - ``kind`` is always ``"exact"``. + """ + if qc is None: + qc = self.state + return crit_depth(qc), "exact" + + def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = None) -> tuple[float, str]: + """Calculates the estimated Hellinger distance for the given quantum circuit. + + Note: + Hellinger distance is always computed exactly using the environment's + pretrained machine learning model. + + Args: + qc: + Circuit to evaluate. If ``None``, evaluates the environment's current state. + + Returns: + A tuple ``(value, kind)`` where: + - ``value`` is the estimated Hellinger distance (float). + - ``kind`` is always ``"exact"``. + """ + if qc is None: + qc = self.state + return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact" + def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]: """Compute the reward for a circuit and report whether it was computed exactly or approximately. @@ -422,70 +677,24 @@ def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") if qc is None: qc = self.state - # Reward functions that are always computed exactly. - if self.reward_function not in {"expected_fidelity", "estimated_success_probability"}: - if self.reward_function == "critical_depth": - return crit_depth(qc), "exact" - if self.reward_function == "estimated_hellinger_distance": - return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact" - # Fallback for other unknown / not-yet-implemented reward functions: - logger.warning( - "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", - self.reward_function, - ) - return 0.0, "exact" - - reward_layout = cast("TranspileLayout | Layout | None", getattr(qc, "_layout", None)) - if reward_layout is None: - # use the env layout if the circuit has no attached layout - # (e.g., if it's an intermediate state or a newly exported copy) - reward_layout = self.layout - - # Dual-path evaluation (exact vs. approximate) for EF / ESP. - if mode == "exact": - kind = "exact" - elif mode == "approx": - kind = "approx" - else: # "auto" - only_native = self.is_circuit_synthesized(qc) - laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False - mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False - - kind = "exact" if (only_native and laid_out and mapped) else "approx" - - if kind == "exact": - exact_qc = ( - qc if reward_layout is None or getattr(qc, "_layout", None) is not None else self.export_circuit(qc) - ) - if self.reward_function == "expected_fidelity": - return expected_fidelity(exact_qc, self.device), "exact" - - return estimated_success_probability(exact_qc, self.device), "exact" + if self.reward_function == "expected_fidelity": + return self.calculate_expected_fidelity(qc, mode) - # Approximate metrics use per-basis-gate averages cached from device calibration - self._ensure_device_averages_cached() + if self.reward_function == "estimated_success_probability": + return self.calculate_estimated_success_probability(qc, mode) - if self.reward_function == "expected_fidelity": - val = approx_expected_fidelity( - qc, - device=self.device, - error_rates=self.err_by_gate, - ) - return val, "approx" + if self.reward_function == "critical_depth": + return self.calculate_critical_depth(qc) - feats = calc_supermarq_features(qc) + if self.reward_function == "estimated_hellinger_distance": + return self.calculate_estimated_hellinger_distance(qc) - val = approx_estimated_success_probability( - qc, - device=self.device, - error_rates=self.err_by_gate, - gate_durations=self.dur_by_gate, - tbar=self.tbar, - par_feature=float(feats.parallelism), - liv_feature=float(feats.liveness), - n_qubits=int(qc.num_qubits), + # Fallback for other unknown / not-yet-implemented reward functions: + logger.warning( + "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.", + self.reward_function, ) - return val, "approx" + return 0.0, "exact" def render(self) -> None: """Renders the current state.""" @@ -534,15 +743,45 @@ def reset( self.valid_actions = self.determine_valid_actions_for_state() self.error_occurred = False - - self.prev_reward = None - self.prev_reward_kind = None + self.tracer = None self.num_qubits_uncompiled_circuit = self.state.num_qubits self.has_parameterized_gates = len(self.state.parameters) > 0 + + # create baseline values + obs = self._create_observation() + self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto") + logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name) - return self._create_observation(), {} + if self.tracer_output_path is not None: + logger.info("Tracing enabled for compilation...") + + if self.reward_function != "estimated_hellinger_distance": + self.hellinger_model = None + hellinger_model_path = get_hellinger_model_path(self.device) + if hellinger_model_path.is_file(): + # load the model so it can be used in _collect_tracer_data + self.hellinger_model = load(hellinger_model_path) + + self.tracer = CompilationTracer.from_initial_state( + device=self.device, + circuit_name=self.current_circuit_name, + figure_of_merit=self.reward_function, + mdp_policy=self.mdp, + ) + + self._collect_tracer_data( + step_index=0, + action_name="Baseline", + reward_val=0.0, + fom_value=self.prev_reward, + fom_kind=self.prev_reward_kind, + feature_vector=obs, + done=False, + ) + + return obs, {} def action_masks(self) -> list[bool]: """Returns a list of valid actions for the current state.""" @@ -1035,7 +1274,6 @@ def is_circuit_routed(self, circuit: QuantumCircuit, coupling_map: CouplingMap) def determine_valid_actions_for_state(self) -> list[int]: """Determine valid actions based on circuit state: synthesized, mapped, routed.""" synthesized, laid_out, routed = self._get_compilation_state_flags() - actions = [] # Initial state if not synthesized and not laid_out and not routed: diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py new file mode 100644 index 000000000..34374efad --- /dev/null +++ b/src/mqt/predictor/rl/tracer.py @@ -0,0 +1,299 @@ +# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM +# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# Licensed under the MIT License + +"""Visualization module for recording and exporting the RL compilation process.""" + +from __future__ import annotations + +import json +import time +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING + +from qiskit import qasm3 + +if TYPE_CHECKING: + import numpy as np + from numpy.typing import NDArray + from qiskit import QuantumCircuit + from qiskit.transpiler import InstructionProperties, Target + + +@dataclass +class TopologyEdge: + """Represents a topology edge between two qubits. + + Attributes: + control: The control qubit index. + target: The target qubit index. + """ + + control: int + target: int + + +@dataclass +class GateCalibration: + """Calibration data for a specific gate on a specific set of qubits. + + Attributes: + qubits: The qubits that the calibration data applies to. + duration: The instructions execution duration (in seconds) on the specified set of qubits. + error: The average error rate for the instruction on the specified set of qubits. + """ + + qubits: list[int] + duration: float | None + error: float | None + + +@dataclass +class DeviceMetadata: + """Metadata containing information about the target quantum device for compilation. + + Attributes: + description: The name of the quantum device. + device_qubits: The number of qubits available on the device. + native_gates: A set of gates native to this device. + calibration_data: The calibration data for this device per native instruction. + """ + + description: str + device_qubits: int + native_gates: list[str] + topology: list[TopologyEdge] + calibration_data: dict[str, list[GateCalibration]] + + +@dataclass +class FOMMetric: + """Represents a Figure of Merit metric value. + + Attributes: + value: The figure-of-merit value. + kind: The used calculation method of the value, "exact" or "approx". + """ + + value: float + kind: str + + +@dataclass +class FigureOfMeritMetrics: + """Data containing information about various figures of merit. + + Attributes: + expected_fidelity: The expected fidelity value of the circuit. + critical_depth: The critical depth of the circuit. + hellinger_distance: The hellinger distance of the circuit, if available. + success_probability: The success probability of the circuit, if available. + """ + + expected_fidelity: FOMMetric + critical_depth: FOMMetric + hellinger_distance: FOMMetric | None + success_probability: FOMMetric | None + + +@dataclass +class CompilationStep: + """A snapshot of the circuit state and environment metrics at a single timestep. + + Attributes: + step_index: The current step number in the reinforcement learning episode. + action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords'). + reward: The calculated reward value for applying this specific action. + current_depth: The depth of the quantum circuit after the action was applied. + num_qubits: The number of qubits in the circuit. + gates_per_operation: The number of gates per operation occurring in the circuit. + total_gates: The total number of gates included in the circuit. + figures_of_merit: The figure of merit values for the current circuit. + synthesized: Whether the circuit has already been synthesized. + laid_out: Whether the circuit has already been laid out. + routed: Whether the circuit has already been routed. + is_terminal: A flag indicating if the compilation process has concluded. + circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format. + program_communication: The program communication value for the current circuit. + raw_critical_depth: The raw critical depth of the circuit. + entanglement_ratio: The entanglement ratio for the current circuit. + parallelism: The parallelism value for the current circuit. + liveness: The liveness value for the current circuit. + """ + + step_index: int + action: str + reward: float + current_depth: int + num_qubits: int + gates_per_operation: dict[str, int] + total_gates: int + figures_of_merit: FigureOfMeritMetrics + synthesized: bool + laid_out: bool + routed: bool + is_terminal: bool + circuit_qasm3: str + program_communication: float + raw_critical_depth: float + entanglement_ratio: float + parallelism: float + liveness: float + + +@dataclass +class CompilationTracer: + """Aggregates compilation steps and metadata for export to a JSON file. + + This class acts as an in-memory buffer during the reinforcement learning compilation + process. It tracks the physical transformations of the circuit and exports the + entire episode as a structured JSON file upon termination. + + Attributes: + circuit_name: The name of the circuit being compiled. + figure_of_merit: The chosen figure of merit for this compilation. + mdp_policy: The MDP transition policy. + device: The target device metadata. + schema_version: The version of this schema. Upgrade in case of schema changes to maintain compatibility with tracer frontend. + timestamp: A timestamp indicating start of the compilation. + steps: An ordered list of CompilationStep snapshots. + """ + + circuit_name: str + figure_of_merit: str + mdp_policy: str + device: DeviceMetadata + schema_version: str = "1.0.0" + timestamp: float = field(default_factory=time.time) + steps: list[CompilationStep] = field(default_factory=list) + + @classmethod + def from_initial_state( + cls, + device: Target, + circuit_name: str, + figure_of_merit: str, + mdp_policy: str, + ) -> CompilationTracer: + """Alternative constructor to build the tracer more conveniently from the environment's initial state. + + Args: + device: The target device for which compilation is performed. + circuit_name: The name of the circuit being compiled. + figure_of_merit: The chosen figure of merit for this compilation. + mdp_policy: The MDP transition policy. + """ + device_meta = cls._extract_device_metadata(device) + return cls( + circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta + ) + + def record_step( + self, + step_index: int, + action: str, + reward: float, + current_qc: QuantumCircuit, + figures_of_merit: FigureOfMeritMetrics, + features: dict[str, int | NDArray[np.float32]], + synthesized: bool, + laid_out: bool, + routed: bool, + done: bool, + ) -> None: + """Records a single compilation action and the resulting circuit state. + + Args: + step_index: The current step number in the environment. + action: The name of the compilation pass that was just applied. + reward: The calculated reward for the applied pass. + current_qc: The current Qiskit QuantumCircuit object after the pass. + figures_of_merit: The available figures of merit for the current circuit. + features: The quantum circuit's feature vector used by the RL agent. + synthesized: Whether the circuit has already been synthesized. + laid_out: Whether the circuit has already been laid out. + routed: Whether the circuit has already been routed. + done: Boolean indicating if this is the final step of the compilation. + """ + present_ops_dict: dict[str, int] = { + str(gate_name): int(count) + for gate_name, count in current_qc.count_ops().items() + if str(gate_name) != "barrier" + } + total_gates = sum(present_ops_dict.values()) + + new_step = CompilationStep( + step_index=step_index, + action=action, + reward=round(reward, 6), + current_depth=current_qc.depth(), + num_qubits=current_qc.num_qubits, + gates_per_operation=present_ops_dict, + total_gates=total_gates, + figures_of_merit=figures_of_merit, + is_terminal=done, + circuit_qasm3=qasm3.dumps(current_qc), + program_communication=self._extract_float(features["program_communication"]), + raw_critical_depth=self._extract_float(features["critical_depth"]), + entanglement_ratio=self._extract_float(features["entanglement_ratio"]), + parallelism=self._extract_float(features["parallelism"]), + liveness=self._extract_float(features["liveness"]), + synthesized=synthesized, + laid_out=laid_out, + routed=routed, + ) + self.steps.append(new_step) + + def save_to_json(self, filepath: str | Path) -> None: + """Serializes the metadata and all recorded steps to a JSON file. + + Args: + filepath: The destination path or filename for the output JSON file. + """ + path = Path(filepath) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as f: + json.dump(asdict(self), f, indent=4) + + @staticmethod + def _extract_device_metadata(device: Target) -> DeviceMetadata: + """Internal helper to extract topology and calibration data from the device.""" + native_gates = list(device.operation_names) + cmap = device.build_coupling_map() + topology = [TopologyEdge(control=c, target=t) for c, t in cmap] if cmap is not None else [] + calibration_data: dict[str, list[GateCalibration]] = {} + + for gate_name in native_gates: + gate_calibrations = [] + props: InstructionProperties + qubit_tuples: tuple[int, ...] + + for qubit_tuples, props in device[gate_name].items(): + if qubit_tuples is None or props is None: + continue + + gate_calibrations.append( + GateCalibration(qubits=list(qubit_tuples), duration=props.duration, error=props.error) + ) + + calibration_data[gate_name] = gate_calibrations + + return DeviceMetadata( + description=device.description, + device_qubits=device.num_qubits, + native_gates=native_gates, + topology=topology, + calibration_data=calibration_data, + ) + + @staticmethod + def _extract_float(val: int | NDArray[np.float32]) -> float: + """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements.""" + if isinstance(val, int): + return float(val) + return float(val[0]) diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py new file mode 100644 index 000000000..6f3b49477 --- /dev/null +++ b/tests/compilation/test_tracer.py @@ -0,0 +1,100 @@ +# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM +# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# Licensed under the MIT License + +"""Tests for the CompilationTracer.""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +import pytest +from mqt.bench import BenchmarkLevel, get_benchmark +from mqt.bench.targets.devices import get_device + +from mqt.predictor.rl.helper import get_path_trained_model +from mqt.predictor.rl.predictor import Predictor, rl_compile +from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata + +if TYPE_CHECKING: + from pathlib import Path + + +def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None: + """Test that the compilation tracer correctly generates a JSON file when a path is provided. + + Args: + tmp_path: Pytest-provided temporary directory used for the trace output file. + """ + trace_file = tmp_path / "test_trace.json" + qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3) + + figure_of_merit = "expected_fidelity" + device = get_device("ibm_falcon_127") + model_name = "model_" + figure_of_merit + "_" + device.description + model_path = get_path_trained_model() / (model_name + ".zip") + + if not model_path.exists(): + predictor = Predictor(figure_of_merit=figure_of_merit, device=device) + predictor.train_model(timesteps=1000, test=True) + + _compiled_qc, _compilation_info = rl_compile( + qc, device=device, figure_of_merit=figure_of_merit, tracer_output_path=str(trace_file) + ) + + assert trace_file.exists(), "Tracer JSON file was not generated." + assert trace_file.is_file(), "Tracer output path is not a valid file." + + with trace_file.open(encoding="utf-8") as f: + trace_data = json.load(f) + + assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name." + assert "mdp_policy" in trace_data, "Tracer JSON is missing the mdp policy." + assert "device" in trace_data, "Tracer JSON is missing the device information." + assert "schema_version" in trace_data, "Tracer JSON is missing the schema version." + assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp." + assert "steps" in trace_data, "Tracer JSON is missing the steps array." + + assert len(trace_data["steps"]) > 1, "Tracer should record subsequent compilation steps beyond the Baseline." + assert trace_data["steps"][0]["action"] == "Baseline", "First step must be Baseline." + assert trace_data["schema_version"] == "1.0.0" + + last_step_data = trace_data["steps"][-1] + assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal." + + # Verify Figures of Merit + fom_data = last_step_data.get("figures_of_merit") + assert fom_data is not None, "Figures of merit dictionary is missing from the trace step." + + # always calculated ones + assert fom_data.get("expected_fidelity") is not None, "Expected fidelity failed to populate." + assert fom_data.get("critical_depth") is not None, "Critical depth fallback failed." + + # for this device ESP should be populated + assert fom_data.get("success_probability") is not None, "ESP fallback calculation failed." + assert "value" in fom_data["success_probability"], "ESP is missing its float value." + assert "kind" in fom_data["success_probability"], "ESP is missing its kind string." + + # It is valid for HD to be None (model missing) or a populated dictionary (model exists) + hd_metric = fom_data.get("hellinger_distance") + if hd_metric is not None: + assert "value" in hd_metric, "Hellinger distance is missing its float value." + assert "kind" in hd_metric, "Hellinger distance is missing its kind string." + + try: + # Initialize from JSON (throws if the structures don't match) + DeviceMetadata(**trace_data["device"]) + + # Semantically validate both the first and the last steps + CompilationStep(**trace_data["steps"][0]) + CompilationStep(**last_step_data) + + except TypeError as e: + pytest.fail( + f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}" + )