munich-quantum-toolkit · flowerthrower · Jun 5, 2026 · Mar 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 
 ### Changed
 
+- 🎨 Improve the RL state machine logic ([#677]) ([**@flowerthrower**])
 - 🐛 Support BQSKit conversion of IQM's native `r` gate ([#679]) ([**@flowerthrower**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
@@ -47,6 +48,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 
 <!-- PR links -->
 
+[#677]: https://github.com/munich-quantum-toolkit/predictor/pull/677
 [#679]: https://github.com/munich-quantum-toolkit/predictor/pull/679
 [#572]: https://github.com/munich-quantum-toolkit/predictor/pull/572
 [#489]: https://github.com/munich-quantum-toolkit/predictor/pull/489

diff --git a/src/mqt/predictor/reward.py b/src/mqt/predictor/reward.py
@@ -23,7 +23,6 @@
 
 if TYPE_CHECKING:
     from qiskit import QuantumCircuit
-    from qiskit.circuit import QuantumRegister, Qubit
     from qiskit.transpiler import Target
     from sklearn.ensemble import RandomForestRegressor
 
@@ -62,44 +61,22 @@ def expected_fidelity(qc: QuantumCircuit, device: Target, precision: int = 10) -
 
         if gate_type != "barrier":
             assert len(qargs) in [1, 2]
-            first_qubit_idx = calc_qubit_index(qargs, qc.qregs, 0)
+            first_qubit_idx = qc.find_bit(qargs[0]).index
 
             if len(qargs) == 1:
                 specific_fidelity = 1 - device[gate_type][first_qubit_idx,].error
             else:
-                second_qubit_idx = calc_qubit_index(qargs, qc.qregs, 1)
-                specific_fidelity = 1 - device[gate_type][first_qubit_idx, second_qubit_idx].error
-
+                second_qubit_idx = qc.find_bit(qargs[1]).index
+                try:
+                    specific_fidelity = 1 - device[gate_type][first_qubit_idx, second_qubit_idx].error
+                except KeyError:
+                    msg = f"Error rate for gate {gate_type} on qubits {first_qubit_idx} and {second_qubit_idx} not found in device properties."
+                    raise KeyError(msg) from None
             res *= specific_fidelity
 
     return float(np.round(res, precision).item())
 
 
-def calc_qubit_index(qargs: list[Qubit], qregs: list[QuantumRegister], index: int) -> int:
-    """Calculates the global qubit index for a given quantum circuit and qubit index.
-
-    Arguments:
-        qargs: The qubits of the quantum circuit.
-        qregs: The quantum registers of the quantum circuit.
-        index: The index of the qubit in the qargs list.
-
-    Returns:
-        The global qubit index of the given qubit in the quantum circuit.
-
-    Raises:
-        ValueError: If the qubit index is not found in the quantum registers.
-    """
-    offset = 0
-    for reg in qregs:
-        if qargs[index] not in reg:
-            offset += reg.size
-        else:
-            qubit_index: int = offset + reg.index(qargs[index])
-            return qubit_index
-    error_msg = f"Global qubit index for local qubit {index} index not found."
-    raise ValueError(error_msg)
-
-
 def estimated_success_probability(qc: QuantumCircuit, device: Target, precision: int = 10) -> float:
     """Calculates the estimated success probability of a given quantum circuit on a given device.
 
@@ -125,7 +102,7 @@ def estimated_success_probability(qc: QuantumCircuit, device: Target, precision:
         if gate_type == "barrier" or gate_type == "id":
             continue
         assert len(qargs) in (1, 2)
-        first_qubit_idx = calc_qubit_index(qargs, qc.qregs, 0)
+        first_qubit_idx = qc.find_bit(qargs[0]).index
         active_qubits.add(first_qubit_idx)
 
         if len(qargs) == 1:  # single-qubit gate
@@ -140,7 +117,7 @@ def estimated_success_probability(qc: QuantumCircuit, device: Target, precision:
             ))
             exec_time_per_qubit[first_qubit_idx] += duration
         else:  # multi-qubit gate
-            second_qubit_idx = calc_qubit_index(qargs, qc.qregs, 1)
+            second_qubit_idx = qc.find_bit(qargs[1]).index
             active_qubits.add(second_qubit_idx)
             duration = device[gate_type][first_qubit_idx, second_qubit_idx].duration
             op_times.append((gate_type, [first_qubit_idx, second_qubit_idx], duration, "s"))
@@ -191,7 +168,7 @@ def estimated_success_probability(qc: QuantumCircuit, device: Target, precision:
             continue
 
         assert len(qargs) in (1, 2)
-        first_qubit_idx = calc_qubit_index(qargs, qc.qregs, 0)
+        first_qubit_idx = scheduled_circ.find_bit(qargs[0]).index
 
         if len(qargs) == 1:
             if gate_type == "measure":
@@ -213,8 +190,12 @@ def estimated_success_probability(qc: QuantumCircuit, device: Target, precision:
                 continue
             res *= 1 - device[gate_type][first_qubit_idx,].error
         else:
-            second_qubit_idx = calc_qubit_index(qargs, qc.qregs, 1)
-            res *= 1 - device[gate_type][first_qubit_idx, second_qubit_idx].error
+            second_qubit_idx = scheduled_circ.find_bit(qargs[1]).index
+            try:
+                res *= 1 - device[gate_type][first_qubit_idx, second_qubit_idx].error
+            except KeyError:
+                msg = f"Error rate for gate {gate_type} on qubits {first_qubit_idx} and {second_qubit_idx} not found in device properties."
+                raise KeyError(msg) from None
 
     if qiskit_version >= "2.0.0":
         for i in range(device.num_qubits):

diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py
@@ -11,7 +11,6 @@
 from __future__ import annotations
 
 import os
-import sys
 from collections import defaultdict
 from dataclasses import dataclass
 from enum import Enum
@@ -90,6 +89,7 @@
     from qiskit.passmanager.base_tasks import Task
 
     TaskList = list[Task | TketBasePass | PreProcessTKETRoutingAfterQiskitLayout]
+    from qiskit.passmanager import PropertySet
 
 
 class CompilationOrigin(str, Enum):
@@ -146,7 +146,7 @@ class DeviceDependentAction(Action):
             Callable[..., tuple[Any, ...] | Circuit],
         ]
     )
-    do_while: Callable[[dict[str, Circuit]], bool] | None = None
+    do_while: Callable[[PropertySet], bool] | None = None
 
 
 # Registry of actions
@@ -332,7 +332,7 @@ def remove_action(name: str) -> None:
             circuit,
             optimization_level=1 if os.getenv("GITHUB_ACTIONS") == "true" else 2,
             synthesis_epsilon=1e-1 if os.getenv("GITHUB_ACTIONS") == "true" else 1e-8,
-            max_synthesis_size=2 if os.getenv("GITHUB_ACTIONS") == "true" else 3,
+            max_synthesis_size=3,
             seed=10,
             num_workers=1 if os.getenv("GITHUB_ACTIONS") == "true" else -1,
         ),
@@ -431,7 +431,7 @@ def remove_action(name: str) -> None:
                 with_mapping=True,
                 optimization_level=1 if os.getenv("GITHUB_ACTIONS") == "true" else 2,
                 synthesis_epsilon=1e-1 if os.getenv("GITHUB_ACTIONS") == "true" else 1e-8,
-                max_synthesis_size=2 if os.getenv("GITHUB_ACTIONS") == "true" and sys.platform != "linux" else 3,
+                max_synthesis_size=3,
                 seed=10,
                 num_workers=1 if os.getenv("GITHUB_ACTIONS") == "true" else -1,
             )
@@ -461,7 +461,7 @@ def remove_action(name: str) -> None:
                 model=MachineModel(bqskit_circuit.num_qudits, gate_set=get_bqskit_native_gates(device)),
                 optimization_level=1 if os.getenv("GITHUB_ACTIONS") == "true" else 2,
                 synthesis_epsilon=1e-1 if os.getenv("GITHUB_ACTIONS") == "true" else 1e-8,
-                max_synthesis_size=2 if os.getenv("GITHUB_ACTIONS") == "true" and sys.platform != "linux" else 3,
+                max_synthesis_size=3,
                 seed=10,
                 num_workers=1 if os.getenv("GITHUB_ACTIONS") == "true" else -1,
             )

diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
@@ -89,19 +89,24 @@ def train_model(
         timesteps: int = 1000,
         verbose: int = 2,
         test: bool = False,
+        seed: int | None = None,
     ) -> None:
         """Trains all models for the given reward functions and device.
 
         Arguments:
             timesteps: The number of timesteps to train the model. Defaults to 1000.
             verbose: The verbosity level. Defaults to 2.
             test: Whether to train the model for testing purposes. Defaults to False.
+            seed: The random seed to use for reproducible training. Set to None to use true randomness.
+                Defaults to None.
         """
+        if seed is not None:
+            set_random_seed(seed)
         if test:
-            set_random_seed(0)  # for reproducibility
-            n_steps = 10
+            # minimum training overhead
+            n_steps = max(timesteps, 2)
             n_epochs = 1
-            batch_size = 10
+            batch_size = n_steps
             progress_bar = False
         else:
             # default PPO values
@@ -120,6 +125,7 @@ def train_model(
             n_steps=n_steps,
             batch_size=batch_size,
             n_epochs=n_epochs,
+            seed=seed,
         )
         # Training Loop: In each iteration, the agent collects n_steps steps (rollout),
         # updates the policy for n_epochs, and then repeats the process until total_timesteps steps have been taken.