diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b81df38f..dd4d38b26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 - ✨ Improved the MDP and extended the RL predictor's action/state space (expanded observation vector, support for stochastic passes, wrapped stochastic actions) ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Added AIRouting and new optimization actions (KAKDecomposition, ElidePermutations) to the RL action set ([#449]) ([**@Shaobo-Zhou**])
 - ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**])
+- ✨ Added CompilationTracer that collects compilation information and exports it to a JSON file ([#641]) ([**@linus-hologram**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
 
@@ -65,6 +66,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 [#393]: https://github.com/munich-quantum-toolkit/predictor/pull/393
 [#385]: https://github.com/munich-quantum-toolkit/predictor/pull/385
 [#360]: https://github.com/munich-quantum-toolkit/predictor/pull/360
+[#641]: https://github.com/munich-quantum-toolkit/predictor/pull/641
 
 <!-- Contributor -->
 
@@ -74,6 +76,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 [**@denialhaag**]: https://github.com/denialhaag
 [**@bachase**]: https://github.com/bachase
 [**@Shaobo-Zhou**]: https://github.com/Shaobo-Zhou
+[**@linus-hologram**]: https://github.com/linus-hologram
 
 <!-- General links -->
 
diff --git a/src/mqt/predictor/qcompile.py b/src/mqt/predictor/qcompile.py
index d65be982e..a1f8240f0 100644
--- a/src/mqt/predictor/qcompile.py
+++ b/src/mqt/predictor/qcompile.py
@@ -16,6 +16,8 @@
 from mqt.predictor.rl import rl_compile
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from qiskit import QuantumCircuit
 
     from mqt.predictor.reward import figure_of_merit
@@ -24,16 +26,20 @@
 def qcompile(
     qc: QuantumCircuit,
     figure_of_merit: figure_of_merit = "expected_fidelity",
+    tracer_output_path: str | Path | None = None,
 ) -> tuple[QuantumCircuit, list[str], str]:
     """Compiles a given quantum circuit to a device with the highest predicted figure of merit.
 
     Arguments:
         qc: The quantum circuit to be compiled.
         figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity".
+        tracer_output_path: If provided, enables compiler tracing and exports the JSON log to this path/directory.
 
     Returns:
         A tuple containing the compiled quantum circuit, the compilation information, and the name of the device used for compilation.
     """
     predicted_device = predict_device_for_figure_of_merit(qc, figure_of_merit=figure_of_merit)
-    res = rl_compile(qc, device=predicted_device, figure_of_merit=figure_of_merit)
+    res = rl_compile(
+        qc, device=predicted_device, figure_of_merit=figure_of_merit, tracer_output_path=tracer_output_path
+    )
     return *res, predicted_device
diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py
index 0e32d63d2..0e0e31be6 100644
--- a/src/mqt/predictor/rl/actions.py
+++ b/src/mqt/predictor/rl/actions.py
@@ -10,7 +10,6 @@
 
 from __future__ import annotations
 
-import os
 import sys
 import warnings
 from collections import defaultdict
@@ -18,8 +17,6 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from bqskit import MachineModel
-from bqskit import compile as bqskit_compile
 from pytket.architecture import Architecture
 from pytket.passes import (
     CliffordSimp,
@@ -78,7 +75,6 @@
 
 from mqt.predictor.rl.parsing import (
     PreProcessTKETRoutingAfterQiskitLayout,
-    get_bqskit_native_gates,
 )
 
 IS_WIN_PY313 = sys.platform == "win32" and sys.version_info[:2] == (3, 13)
@@ -366,7 +362,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitO2",
 #        CompilationOrigin.BQSKIT,
@@ -380,7 +376,7 @@ def remove_action(name: str) -> None:
 #            num_workers=-1,
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceDependentAction(
@@ -526,7 +522,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitMapping",
 #        CompilationOrigin.BQSKIT,
@@ -548,7 +544,7 @@ def remove_action(name: str) -> None:
 #            )
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceDependentAction(
@@ -561,7 +557,7 @@ def remove_action(name: str) -> None:
     )
 )
 
-#register_action(
+# register_action(
 #    DeviceDependentAction(
 #        "BQSKitSynthesis",
 #        CompilationOrigin.BQSKIT,
@@ -578,7 +574,7 @@ def remove_action(name: str) -> None:
 #            )
 #        ),
 #    )
-#)
+# )
 
 register_action(
     DeviceIndependentAction(
diff --git a/src/mqt/predictor/rl/predictor.py b/src/mqt/predictor/rl/predictor.py
index fe4b400ef..ee9321f87 100644
--- a/src/mqt/predictor/rl/predictor.py
+++ b/src/mqt/predictor/rl/predictor.py
@@ -40,6 +40,7 @@ def __init__(
         mdp: str = "paper",
         path_training_circuits: Path | None = None,
         logger_level: int = logging.INFO,
+        tracer_output_path: str | Path | None = None,
     ) -> None:
         """Initializes the Predictor object."""
         logger.setLevel(logger_level)
@@ -49,6 +50,7 @@ def __init__(
             device=device,
             mdp=mdp,
             path_training_circuits=path_training_circuits,
+            tracer_output_path=tracer_output_path,
         )
         self.device_name = device.description
         self.figure_of_merit = figure_of_merit
@@ -178,6 +180,7 @@ def rl_compile(
     device: Target | None,
     figure_of_merit: figure_of_merit | None = "expected_fidelity",
     predictor_singleton: Predictor | None = None,
+    tracer_output_path: str | Path | None = None,
 ) -> tuple[QuantumCircuit, list[str]]:
     """Compiles a given quantum circuit to a device optimizing for the given figure of merit.
 
@@ -186,6 +189,7 @@ def rl_compile(
         device: The device to compile to.
         figure_of_merit: The figure of merit to be used for compilation. Defaults to "expected_fidelity".
         predictor_singleton: A predictor object that is used for compilation to reduce compilation time when compiling multiple quantum circuits. If None, a new predictor object is created. Defaults to None.
+        tracer_output_path: If provided, enables compiler tracing and exports the JSON log to the specified path.
 
     Returns:
         A tuple containing the compiled quantum circuit and the compilation information. If compilation fails, False is returned.
@@ -200,8 +204,14 @@ def rl_compile(
         if device is None:
             msg = "device must not be None if predictor_singleton is None."
             raise ValueError(msg)
-        predictor = Predictor(figure_of_merit=figure_of_merit, device=device)
-    else:
-        predictor = predictor_singleton
-
-    return predictor.compile_as_predicted(qc)
+        predictor = Predictor(figure_of_merit=figure_of_merit, device=device, tracer_output_path=tracer_output_path)
+        return predictor.compile_as_predicted(qc)
+
+    # use singleton and restore tracer path afterward
+    predictor = predictor_singleton
+    original_tracer_output_path = predictor.env.tracer_output_path
+    predictor.env.tracer_output_path = tracer_output_path
+    try:
+        return predictor.compile_as_predicted(qc)
+    finally:
+        predictor.env.tracer_output_path = original_tracer_output_path
diff --git a/src/mqt/predictor/rl/predictorenv.py b/src/mqt/predictor/rl/predictorenv.py
index 457a9a1ff..ecc481063 100644
--- a/src/mqt/predictor/rl/predictorenv.py
+++ b/src/mqt/predictor/rl/predictorenv.py
@@ -14,10 +14,13 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+import numpy as np
+
 if TYPE_CHECKING:
     from collections.abc import Callable
 
     from bqskit import Circuit
+    from numpy.typing import NDArray
     from pytket._tket.passes import BasePass as TketBasePass
     from pytket.circuit import Node
     from qiskit.passmanager.base_tasks import Task
@@ -34,7 +37,6 @@
 from math import isclose
 from typing import cast
 
-import numpy as np
 from bqskit.ext import bqskit_to_qiskit, qiskit_to_bqskit
 from gymnasium import Env
 from gymnasium.spaces import Box, Dict, Discrete
@@ -86,6 +88,11 @@
     postprocess_vf2postlayout,
     prepare_noise_data,
 )
+from mqt.predictor.rl.tracer import (
+    CompilationTracer,
+    FigureOfMeritMetrics,
+    FOMMetric,
+)
 from mqt.predictor.utils import calc_supermarq_features, get_openqasm_gates_for_rl
 
 logger = logging.getLogger("mqt-predictor")
@@ -131,6 +138,7 @@ def __init__(
         path_training_circuits: Path | None = None,
         reward_scale: float = 1.0,
         no_effect_penalty: float = -0.001,
+        tracer_output_path: str | Path | None = None,
     ) -> None:
         """Initializes the PredictorEnv object.
 
@@ -145,6 +153,7 @@ def __init__(
             path_training_circuits: The path to the training circuits folder. Defaults to None, which uses the default path.
             reward_scale: Scaling factor for rewards/penalties proportional to fidelity changes.
             no_effect_penalty: Step penalty applied when an action does not change the circuit (no-op).
+            tracer_output_path: Whether to enable compilation tracing. If provided, this will export a JSON file at the end of the compilation process. Defaults to None.
 
         Raises:
             ValueError: If the reward function is "estimated_success_probability" and no calibration data is available for the device or if the reward function is "estimated_hellinger_distance" and no trained model is available for the device.
@@ -163,6 +172,9 @@ def __init__(
         self.actions_structure_preserving_indices = []  # Actions that preserves the mapping and native gates
         self.used_actions: list[str] = []
         self.device = device
+        self.tracer_output_path = tracer_output_path
+        self.hellinger_model = None
+        self.tracer = None
 
         logger.info("MDP: " + mdp)
         self.mdp = mdp
@@ -245,8 +257,8 @@ def __init__(
         self.readout_err: dict[Node, float] | None = None
         self.reward_scale = reward_scale
         self.no_effect_penalty = no_effect_penalty
-        self.prev_reward: float | None = None
-        self.prev_reward_kind: str | None = None
+        self.prev_reward: float = 0.0
+        self.prev_reward_kind: str = "unknown"
         self.episode_count = 0
         self.current_circuit_name = "<unknown>"
         self.err_by_gate: dict[str, float] = {}
@@ -289,7 +301,86 @@ def export_circuit(self, qc: QuantumCircuit | None = None) -> QuantumCircuit:
         exported._layout = self.layout  # noqa: SLF001
         return exported
 
-    def _log_step_reward(self, step_index: int, action_name: str, reward_val: float, done: bool) -> None:
+    def _collect_tracer_data(
+        self,
+        step_index: int,
+        action_name: str,
+        reward_val: float,
+        fom_value: float,
+        fom_kind: str,
+        feature_vector: dict[str, int | NDArray[np.float32]],
+        done: bool,
+    ) -> None:
+        """Collects the current compilation state and sends it to the tracer."""
+        if self.tracer is not None and self.tracer_output_path is not None:
+            synthesized, laid_out, routed = self._get_compilation_state_flags()
+
+            # Collect figures of merit
+            hd_metric: FOMMetric | None = None
+            cd_metric: FOMMetric
+            ef_metric: FOMMetric
+            esp_metric: FOMMetric | None = None
+
+            if self.reward_function == "expected_fidelity":
+                ef_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            else:
+                val, kind = self.calculate_expected_fidelity(qc=self.state, mode="auto")
+                ef_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "estimated_success_probability":
+                esp_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            elif esp_data_available(self.device):
+                val, kind = self.calculate_estimated_success_probability(qc=self.state, mode="auto")
+                esp_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "critical_depth":
+                cd_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            else:
+                val, kind = self.calculate_critical_depth(qc=self.state)
+                cd_metric = FOMMetric(value=val, kind=kind)
+
+            if self.reward_function == "estimated_hellinger_distance":
+                hd_metric = FOMMetric(value=fom_value, kind=fom_kind)
+            elif self.hellinger_model is not None:
+                val, kind = self.calculate_estimated_hellinger_distance(qc=self.state)
+                hd_metric = FOMMetric(value=val, kind=kind)
+
+            metrics = FigureOfMeritMetrics(
+                expected_fidelity=ef_metric,
+                success_probability=esp_metric,
+                critical_depth=cd_metric,
+                hellinger_distance=hd_metric,
+            )
+
+            self.tracer.record_step(
+                step_index=step_index,
+                action=action_name,
+                reward=reward_val,
+                current_qc=self.state,
+                figures_of_merit=metrics,
+                features=feature_vector,
+                synthesized=synthesized,
+                laid_out=laid_out,
+                routed=routed,
+                done=done,
+            )
+
+            if done:
+                out_path = Path(self.tracer_output_path)
+
+                if out_path.is_dir() or not out_path.suffix:
+                    out_path = out_path / f"visualization_{self.current_circuit_name}.json"
+
+                self.tracer.save_to_json(out_path)
+                logger.info("✅TRACE EXPORTED SUCCESSFULLY to: %s", out_path.resolve())
+
+    def _log_step_reward(
+        self,
+        step_index: int,
+        action_name: str,
+        reward_val: float,
+        done: bool,
+    ) -> None:
         """Log the chosen action and resulting reward for the current episode step."""
         logger.info(
             "Episode %d step %d: action=%s reward=%.6f",
@@ -346,19 +437,35 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
 
         altered_qc = self._apply_and_update(action)
         if altered_qc is None:
-            self._log_step_reward(step_index, action_name, 0.0, done=True)
-            return self._create_observation(), 0.0, True, False, {}
+            obs = self._create_observation()
+            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=0.0, done=True)
+            self._collect_tracer_data(
+                step_index=step_index,
+                action_name=action_name,
+                reward_val=0.0,
+                fom_value=0.0,
+                fom_kind="exact",
+                feature_vector=obs,
+                done=True,
+            )
+            return obs, 0.0, True, False, {}
 
         done = action == self.action_terminate_index
 
         if self.reward_function == "estimated_hellinger_distance":
             reward_val = self.calculate_reward(mode="exact")[0] if done else 0.0
-            self._log_step_reward(step_index, action_name, reward_val, done)
-            return self._create_observation(), reward_val, done, False, {}
-
-        # Lazy init: compute prev_reward only once per episode (or if missing)
-        if self.prev_reward is None:
-            self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
+            obs = self._create_observation()
+            self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done)
+            self._collect_tracer_data(
+                step_index=step_index,
+                action_name=action_name,
+                reward_val=reward_val,
+                fom_value=reward_val,
+                fom_kind="exact",
+                feature_vector=obs,
+                done=done,
+            )
+            return obs, reward_val, done, False, {}
 
         if done:
             assert action in self.valid_actions, "Terminate action is not valid but was chosen."
@@ -385,9 +492,157 @@ def step(self, action: int) -> tuple[dict[str, Any], float, bool, bool, dict[Any
             self.prev_reward, self.prev_reward_kind = new_val, new_kind
 
         obs = self._create_observation()
-        self._log_step_reward(step_index, action_name, reward_val, done)
+        self._log_step_reward(step_index=step_index, action_name=action_name, reward_val=reward_val, done=done)
+        self._collect_tracer_data(
+            step_index=step_index,
+            action_name=action_name,
+            reward_val=reward_val,
+            fom_value=self.prev_reward,
+            fom_kind=self.prev_reward_kind,
+            feature_vector=obs,
+            done=done,
+        )
+
         return obs, reward_val, done, False, {}
 
+    def _resolve_evaluation_kind(self, qc: QuantumCircuit, mode: str) -> tuple[str, Any]:
+        """Resolves whether to use 'exact' or 'approx' evaluation based on the circuit state."""
+        reward_layout = getattr(qc, "_layout", None)
+        if reward_layout is None:
+            reward_layout = self.layout
+
+        if mode == "exact":
+            return "exact", reward_layout
+        if mode == "approx":
+            return "approx", reward_layout
+
+        # "auto" resolution
+        only_native = self.is_circuit_synthesized(qc)
+        laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False
+        mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False
+
+        kind = "exact" if (only_native and laid_out and mapped) else "approx"
+        return kind, reward_layout
+
+    def _prepare_exact_qc(self, qc: QuantumCircuit, reward_layout: TranspileLayout | Layout | None) -> QuantumCircuit:
+        """Prepares the circuit for exact evaluation by exporting it if necessary."""
+        if reward_layout is None or getattr(qc, "_layout", None) is not None:
+            return qc
+        return self.export_circuit(qc)
+
+    def calculate_expected_fidelity(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
+        """Calculates the expected fidelity for the given quantum circuit.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+            mode:
+                Selects how the method chooses between exact and approximate evaluation:
+
+                - ``"auto"`` (default): determines computation automatically.
+                - ``"exact"``: always compute the exact, calibration-aware metric.
+                - ``"approx"``: always compute the approximate, transpile-based proxy.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the expected fidelity (float).
+            - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
+        """
+        if qc is None:
+            qc = self.state
+
+        kind, reward_layout = self._resolve_evaluation_kind(qc, mode)
+
+        if kind == "exact":
+            exact_qc = self._prepare_exact_qc(qc, reward_layout)
+            return expected_fidelity(exact_qc, self.device), "exact"
+
+        self._ensure_device_averages_cached()
+        val = approx_expected_fidelity(qc, device=self.device, error_rates=self.err_by_gate)
+        return val, "approx"
+
+    def calculate_estimated_success_probability(
+        self, qc: QuantumCircuit | None = None, mode: str = "auto"
+    ) -> tuple[float, str]:
+        """Calculates the estimated success probability (ESP) for the given quantum circuit.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+            mode:
+                Selects how the method chooses between exact and approximate evaluation:
+
+                - ``"auto"`` (default): determines computation automatically.
+                - ``"exact"``: always compute the exact, calibration-aware metric.
+                - ``"approx"``: always compute the approximate, transpile-based proxy.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the estimated success probability (float).
+            - ``kind`` is ``"exact"`` or ``"approx"`` indicating which regime was used.
+        """
+        if qc is None:
+            qc = self.state
+
+        kind, reward_layout = self._resolve_evaluation_kind(qc, mode)
+
+        if kind == "exact":
+            exact_qc = self._prepare_exact_qc(qc, reward_layout)
+            return estimated_success_probability(exact_qc, self.device), "exact"
+
+        self._ensure_device_averages_cached()
+        feats = calc_supermarq_features(qc)
+        val = approx_estimated_success_probability(
+            qc,
+            device=self.device,
+            error_rates=self.err_by_gate,
+            gate_durations=self.dur_by_gate,
+            tbar=self.tbar,
+            par_feature=float(feats.parallelism),
+            liv_feature=float(feats.liveness),
+            n_qubits=int(qc.num_qubits),
+        )
+        return val, "approx"
+
+    def calculate_critical_depth(self, qc: QuantumCircuit | None = None) -> tuple[float, str]:
+        """Calculates the critical depth for the given quantum circuit.
+
+        Note:
+            Critical depth is always computed exactly.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the critical depth (float).
+            - ``kind`` is always ``"exact"``.
+        """
+        if qc is None:
+            qc = self.state
+        return crit_depth(qc), "exact"
+
+    def calculate_estimated_hellinger_distance(self, qc: QuantumCircuit | None = None) -> tuple[float, str]:
+        """Calculates the estimated Hellinger distance for the given quantum circuit.
+
+        Note:
+            Hellinger distance is always computed exactly using the environment's
+            pretrained machine learning model.
+
+        Args:
+            qc:
+                Circuit to evaluate. If ``None``, evaluates the environment's current state.
+
+        Returns:
+            A tuple ``(value, kind)`` where:
+            - ``value`` is the estimated Hellinger distance (float).
+            - ``kind`` is always ``"exact"``.
+        """
+        if qc is None:
+            qc = self.state
+        return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact"
+
     def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto") -> tuple[float, str]:
         """Compute the reward for a circuit and report whether it was computed exactly or approximately.
 
@@ -422,70 +677,24 @@ def calculate_reward(self, qc: QuantumCircuit | None = None, mode: str = "auto")
         if qc is None:
             qc = self.state
 
-        # Reward functions that are always computed exactly.
-        if self.reward_function not in {"expected_fidelity", "estimated_success_probability"}:
-            if self.reward_function == "critical_depth":
-                return crit_depth(qc), "exact"
-            if self.reward_function == "estimated_hellinger_distance":
-                return estimated_hellinger_distance(qc, self.device, self.hellinger_model), "exact"
-            # Fallback for other unknown / not-yet-implemented reward functions:
-            logger.warning(
-                "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
-                self.reward_function,
-            )
-            return 0.0, "exact"
-
-        reward_layout = cast("TranspileLayout | Layout | None", getattr(qc, "_layout", None))
-        if reward_layout is None:
-            # use the env layout if the circuit has no attached layout
-            # (e.g., if it's an intermediate state or a newly exported copy)
-            reward_layout = self.layout
-
-        # Dual-path evaluation (exact vs. approximate) for EF / ESP.
-        if mode == "exact":
-            kind = "exact"
-        elif mode == "approx":
-            kind = "approx"
-        else:  # "auto"
-            only_native = self.is_circuit_synthesized(qc)
-            laid_out = self.is_circuit_laid_out(qc, reward_layout) if reward_layout is not None else False
-            mapped = self.is_circuit_routed(qc, CouplingMap(self.device.build_coupling_map())) if laid_out else False
-
-            kind = "exact" if (only_native and laid_out and mapped) else "approx"
-
-        if kind == "exact":
-            exact_qc = (
-                qc if reward_layout is None or getattr(qc, "_layout", None) is not None else self.export_circuit(qc)
-            )
-            if self.reward_function == "expected_fidelity":
-                return expected_fidelity(exact_qc, self.device), "exact"
-
-            return estimated_success_probability(exact_qc, self.device), "exact"
+        if self.reward_function == "expected_fidelity":
+            return self.calculate_expected_fidelity(qc, mode)
 
-        # Approximate metrics use per-basis-gate averages cached from device calibration
-        self._ensure_device_averages_cached()
+        if self.reward_function == "estimated_success_probability":
+            return self.calculate_estimated_success_probability(qc, mode)
 
-        if self.reward_function == "expected_fidelity":
-            val = approx_expected_fidelity(
-                qc,
-                device=self.device,
-                error_rates=self.err_by_gate,
-            )
-            return val, "approx"
+        if self.reward_function == "critical_depth":
+            return self.calculate_critical_depth(qc)
 
-        feats = calc_supermarq_features(qc)
+        if self.reward_function == "estimated_hellinger_distance":
+            return self.calculate_estimated_hellinger_distance(qc)
 
-        val = approx_estimated_success_probability(
-            qc,
-            device=self.device,
-            error_rates=self.err_by_gate,
-            gate_durations=self.dur_by_gate,
-            tbar=self.tbar,
-            par_feature=float(feats.parallelism),
-            liv_feature=float(feats.liveness),
-            n_qubits=int(qc.num_qubits),
+        # Fallback for other unknown / not-yet-implemented reward functions:
+        logger.warning(
+            "Reward function '%s' is not supported in PredictorEnv. Returning 0.0 as a fallback reward.",
+            self.reward_function,
         )
-        return val, "approx"
+        return 0.0, "exact"
 
     def render(self) -> None:
         """Renders the current state."""
@@ -534,15 +743,45 @@ def reset(
         self.valid_actions = self.determine_valid_actions_for_state()
 
         self.error_occurred = False
-
-        self.prev_reward = None
-        self.prev_reward_kind = None
+        self.tracer = None
 
         self.num_qubits_uncompiled_circuit = self.state.num_qubits
         self.has_parameterized_gates = len(self.state.parameters) > 0
+
+        # create baseline values
+        obs = self._create_observation()
+        self.prev_reward, self.prev_reward_kind = self.calculate_reward(mode="auto")
+
         logger.info("Starting episode %d with circuit=%s", self.episode_count, self.current_circuit_name)
 
-        return self._create_observation(), {}
+        if self.tracer_output_path is not None:
+            logger.info("Tracing enabled for compilation...")
+
+            if self.reward_function != "estimated_hellinger_distance":
+                self.hellinger_model = None
+                hellinger_model_path = get_hellinger_model_path(self.device)
+                if hellinger_model_path.is_file():
+                    # load the model so it can be used in _collect_tracer_data
+                    self.hellinger_model = load(hellinger_model_path)
+
+            self.tracer = CompilationTracer.from_initial_state(
+                device=self.device,
+                circuit_name=self.current_circuit_name,
+                figure_of_merit=self.reward_function,
+                mdp_policy=self.mdp,
+            )
+
+            self._collect_tracer_data(
+                step_index=0,
+                action_name="Baseline",
+                reward_val=0.0,
+                fom_value=self.prev_reward,
+                fom_kind=self.prev_reward_kind,
+                feature_vector=obs,
+                done=False,
+            )
+
+        return obs, {}
 
     def action_masks(self) -> list[bool]:
         """Returns a list of valid actions for the current state."""
@@ -1035,7 +1274,6 @@ def is_circuit_routed(self, circuit: QuantumCircuit, coupling_map: CouplingMap)
     def determine_valid_actions_for_state(self) -> list[int]:
         """Determine valid actions based on circuit state: synthesized, mapped, routed."""
         synthesized, laid_out, routed = self._get_compilation_state_flags()
-
         actions = []
         # Initial state
         if not synthesized and not laid_out and not routed:
diff --git a/src/mqt/predictor/rl/tracer.py b/src/mqt/predictor/rl/tracer.py
new file mode 100644
index 000000000..34374efad
--- /dev/null
+++ b/src/mqt/predictor/rl/tracer.py
@@ -0,0 +1,299 @@
+# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM
+# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH
+# All rights reserved.
+#
+# SPDX-License-Identifier: MIT
+#
+# Licensed under the MIT License
+
+"""Visualization module for recording and exporting the RL compilation process."""
+
+from __future__ import annotations
+
+import json
+import time
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from qiskit import qasm3
+
+if TYPE_CHECKING:
+    import numpy as np
+    from numpy.typing import NDArray
+    from qiskit import QuantumCircuit
+    from qiskit.transpiler import InstructionProperties, Target
+
+
+@dataclass
+class TopologyEdge:
+    """Represents a topology edge between two qubits.
+
+    Attributes:
+        control: The control qubit index.
+        target: The target qubit index.
+    """
+
+    control: int
+    target: int
+
+
+@dataclass
+class GateCalibration:
+    """Calibration data for a specific gate on a specific set of qubits.
+
+    Attributes:
+        qubits: The qubits that the calibration data applies to.
+        duration: The instructions execution duration (in seconds) on the specified set of qubits.
+        error: The average error rate for the instruction on the specified set of qubits.
+    """
+
+    qubits: list[int]
+    duration: float | None
+    error: float | None
+
+
+@dataclass
+class DeviceMetadata:
+    """Metadata containing information about the target quantum device for compilation.
+
+    Attributes:
+        description: The name of the quantum device.
+        device_qubits: The number of qubits available on the device.
+        native_gates: A set of gates native to this device.
+        calibration_data: The calibration data for this device per native instruction.
+    """
+
+    description: str
+    device_qubits: int
+    native_gates: list[str]
+    topology: list[TopologyEdge]
+    calibration_data: dict[str, list[GateCalibration]]
+
+
+@dataclass
+class FOMMetric:
+    """Represents a Figure of Merit metric value.
+
+    Attributes:
+        value: The figure-of-merit value.
+        kind: The used calculation method of the value, "exact" or "approx".
+    """
+
+    value: float
+    kind: str
+
+
+@dataclass
+class FigureOfMeritMetrics:
+    """Data containing information about various figures of merit.
+
+    Attributes:
+        expected_fidelity: The expected fidelity value of the circuit.
+        critical_depth: The critical depth of the circuit.
+        hellinger_distance: The hellinger distance of the circuit, if available.
+        success_probability: The success probability of the circuit, if available.
+    """
+
+    expected_fidelity: FOMMetric
+    critical_depth: FOMMetric
+    hellinger_distance: FOMMetric | None
+    success_probability: FOMMetric | None
+
+
+@dataclass
+class CompilationStep:
+    """A snapshot of the circuit state and environment metrics at a single timestep.
+
+    Attributes:
+        step_index: The current step number in the reinforcement learning episode.
+        action: The string representation of the compilation pass applied (e.g., 'OptimizeCliffords').
+        reward: The calculated reward value for applying this specific action.
+        current_depth: The depth of the quantum circuit after the action was applied.
+        num_qubits: The number of qubits in the circuit.
+        gates_per_operation: The number of gates per operation occurring in the circuit.
+        total_gates: The total number of gates included in the circuit.
+        figures_of_merit: The figure of merit values for the current circuit.
+        synthesized: Whether the circuit has already been synthesized.
+        laid_out: Whether the circuit has already been laid out.
+        routed: Whether the circuit has already been routed.
+        is_terminal: A flag indicating if the compilation process has concluded.
+        circuit_qasm3: The structural representation of the circuit in OpenQASM 3.0 format.
+        program_communication: The program communication value for the current circuit.
+        raw_critical_depth: The raw critical depth of the circuit.
+        entanglement_ratio: The entanglement ratio for the current circuit.
+        parallelism: The parallelism value for the current circuit.
+        liveness: The liveness value for the current circuit.
+    """
+
+    step_index: int
+    action: str
+    reward: float
+    current_depth: int
+    num_qubits: int
+    gates_per_operation: dict[str, int]
+    total_gates: int
+    figures_of_merit: FigureOfMeritMetrics
+    synthesized: bool
+    laid_out: bool
+    routed: bool
+    is_terminal: bool
+    circuit_qasm3: str
+    program_communication: float
+    raw_critical_depth: float
+    entanglement_ratio: float
+    parallelism: float
+    liveness: float
+
+
+@dataclass
+class CompilationTracer:
+    """Aggregates compilation steps and metadata for export to a JSON file.
+
+    This class acts as an in-memory buffer during the reinforcement learning compilation
+    process. It tracks the physical transformations of the circuit and exports the
+    entire episode as a structured JSON file upon termination.
+
+    Attributes:
+        circuit_name: The name of the circuit being compiled.
+        figure_of_merit: The chosen figure of merit for this compilation.
+        mdp_policy: The MDP transition policy.
+        device: The target device metadata.
+        schema_version: The version of this schema. Upgrade in case of schema changes to maintain compatibility with tracer frontend.
+        timestamp: A timestamp indicating start of the compilation.
+        steps: An ordered list of CompilationStep snapshots.
+    """
+
+    circuit_name: str
+    figure_of_merit: str
+    mdp_policy: str
+    device: DeviceMetadata
+    schema_version: str = "1.0.0"
+    timestamp: float = field(default_factory=time.time)
+    steps: list[CompilationStep] = field(default_factory=list)
+
+    @classmethod
+    def from_initial_state(
+        cls,
+        device: Target,
+        circuit_name: str,
+        figure_of_merit: str,
+        mdp_policy: str,
+    ) -> CompilationTracer:
+        """Alternative constructor to build the tracer more conveniently from the environment's initial state.
+
+        Args:
+            device: The target device for which compilation is performed.
+            circuit_name: The name of the circuit being compiled.
+            figure_of_merit: The chosen figure of merit for this compilation.
+            mdp_policy: The MDP transition policy.
+        """
+        device_meta = cls._extract_device_metadata(device)
+        return cls(
+            circuit_name=circuit_name, figure_of_merit=figure_of_merit, mdp_policy=mdp_policy, device=device_meta
+        )
+
+    def record_step(
+        self,
+        step_index: int,
+        action: str,
+        reward: float,
+        current_qc: QuantumCircuit,
+        figures_of_merit: FigureOfMeritMetrics,
+        features: dict[str, int | NDArray[np.float32]],
+        synthesized: bool,
+        laid_out: bool,
+        routed: bool,
+        done: bool,
+    ) -> None:
+        """Records a single compilation action and the resulting circuit state.
+
+        Args:
+            step_index: The current step number in the environment.
+            action: The name of the compilation pass that was just applied.
+            reward: The calculated reward for the applied pass.
+            current_qc: The current Qiskit QuantumCircuit object after the pass.
+            figures_of_merit: The available figures of merit for the current circuit.
+            features: The quantum circuit's feature vector used by the RL agent.
+            synthesized: Whether the circuit has already been synthesized.
+            laid_out: Whether the circuit has already been laid out.
+            routed: Whether the circuit has already been routed.
+            done: Boolean indicating if this is the final step of the compilation.
+        """
+        present_ops_dict: dict[str, int] = {
+            str(gate_name): int(count)
+            for gate_name, count in current_qc.count_ops().items()
+            if str(gate_name) != "barrier"
+        }
+        total_gates = sum(present_ops_dict.values())
+
+        new_step = CompilationStep(
+            step_index=step_index,
+            action=action,
+            reward=round(reward, 6),
+            current_depth=current_qc.depth(),
+            num_qubits=current_qc.num_qubits,
+            gates_per_operation=present_ops_dict,
+            total_gates=total_gates,
+            figures_of_merit=figures_of_merit,
+            is_terminal=done,
+            circuit_qasm3=qasm3.dumps(current_qc),
+            program_communication=self._extract_float(features["program_communication"]),
+            raw_critical_depth=self._extract_float(features["critical_depth"]),
+            entanglement_ratio=self._extract_float(features["entanglement_ratio"]),
+            parallelism=self._extract_float(features["parallelism"]),
+            liveness=self._extract_float(features["liveness"]),
+            synthesized=synthesized,
+            laid_out=laid_out,
+            routed=routed,
+        )
+        self.steps.append(new_step)
+
+    def save_to_json(self, filepath: str | Path) -> None:
+        """Serializes the metadata and all recorded steps to a JSON file.
+
+        Args:
+            filepath: The destination path or filename for the output JSON file.
+        """
+        path = Path(filepath)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("w", encoding="utf-8") as f:
+            json.dump(asdict(self), f, indent=4)
+
+    @staticmethod
+    def _extract_device_metadata(device: Target) -> DeviceMetadata:
+        """Internal helper to extract topology and calibration data from the device."""
+        native_gates = list(device.operation_names)
+        cmap = device.build_coupling_map()
+        topology = [TopologyEdge(control=c, target=t) for c, t in cmap] if cmap is not None else []
+        calibration_data: dict[str, list[GateCalibration]] = {}
+
+        for gate_name in native_gates:
+            gate_calibrations = []
+            props: InstructionProperties
+            qubit_tuples: tuple[int, ...]
+
+            for qubit_tuples, props in device[gate_name].items():
+                if qubit_tuples is None or props is None:
+                    continue
+
+                gate_calibrations.append(
+                    GateCalibration(qubits=list(qubit_tuples), duration=props.duration, error=props.error)
+                )
+
+            calibration_data[gate_name] = gate_calibrations
+
+        return DeviceMetadata(
+            description=device.description,
+            device_qubits=device.num_qubits,
+            native_gates=native_gates,
+            topology=topology,
+            calibration_data=calibration_data,
+        )
+
+    @staticmethod
+    def _extract_float(val: int | NDArray[np.float32]) -> float:
+        """Safely extracts a float from a scalar or a 1D NumPy array to satisfy linter requirements."""
+        if isinstance(val, int):
+            return float(val)
+        return float(val[0])
diff --git a/tests/compilation/test_tracer.py b/tests/compilation/test_tracer.py
new file mode 100644
index 000000000..6f3b49477
--- /dev/null
+++ b/tests/compilation/test_tracer.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM
+# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH
+# All rights reserved.
+#
+# SPDX-License-Identifier: MIT
+#
+# Licensed under the MIT License
+
+"""Tests for the CompilationTracer."""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+import pytest
+from mqt.bench import BenchmarkLevel, get_benchmark
+from mqt.bench.targets.devices import get_device
+
+from mqt.predictor.rl.helper import get_path_trained_model
+from mqt.predictor.rl.predictor import Predictor, rl_compile
+from mqt.predictor.rl.tracer import CompilationStep, DeviceMetadata
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+def test_compilation_tracer_generates_valid_json(tmp_path: Path) -> None:
+    """Test that the compilation tracer correctly generates a JSON file when a path is provided.
+
+    Args:
+        tmp_path: Pytest-provided temporary directory used for the trace output file.
+    """
+    trace_file = tmp_path / "test_trace.json"
+    qc = get_benchmark("ghz", level=BenchmarkLevel.INDEP, circuit_size=3)
+
+    figure_of_merit = "expected_fidelity"
+    device = get_device("ibm_falcon_127")
+    model_name = "model_" + figure_of_merit + "_" + device.description
+    model_path = get_path_trained_model() / (model_name + ".zip")
+
+    if not model_path.exists():
+        predictor = Predictor(figure_of_merit=figure_of_merit, device=device)
+        predictor.train_model(timesteps=1000, test=True)
+
+    _compiled_qc, _compilation_info = rl_compile(
+        qc, device=device, figure_of_merit=figure_of_merit, tracer_output_path=str(trace_file)
+    )
+
+    assert trace_file.exists(), "Tracer JSON file was not generated."
+    assert trace_file.is_file(), "Tracer output path is not a valid file."
+
+    with trace_file.open(encoding="utf-8") as f:
+        trace_data = json.load(f)
+
+    assert "circuit_name" in trace_data, "Tracer JSON is missing the circuit name."
+    assert "mdp_policy" in trace_data, "Tracer JSON is missing the mdp policy."
+    assert "device" in trace_data, "Tracer JSON is missing the device information."
+    assert "schema_version" in trace_data, "Tracer JSON is missing the schema version."
+    assert "timestamp" in trace_data, "Tracer JSON is missing the timestamp."
+    assert "steps" in trace_data, "Tracer JSON is missing the steps array."
+
+    assert len(trace_data["steps"]) > 1, "Tracer should record subsequent compilation steps beyond the Baseline."
+    assert trace_data["steps"][0]["action"] == "Baseline", "First step must be Baseline."
+    assert trace_data["schema_version"] == "1.0.0"
+
+    last_step_data = trace_data["steps"][-1]
+    assert last_step_data.get("is_terminal") is True, "The final compilation step must be marked as terminal."
+
+    # Verify Figures of Merit
+    fom_data = last_step_data.get("figures_of_merit")
+    assert fom_data is not None, "Figures of merit dictionary is missing from the trace step."
+
+    # always calculated ones
+    assert fom_data.get("expected_fidelity") is not None, "Expected fidelity failed to populate."
+    assert fom_data.get("critical_depth") is not None, "Critical depth fallback failed."
+
+    # for this device ESP should be populated
+    assert fom_data.get("success_probability") is not None, "ESP fallback calculation failed."
+    assert "value" in fom_data["success_probability"], "ESP is missing its float value."
+    assert "kind" in fom_data["success_probability"], "ESP is missing its kind string."
+
+    # It is valid for HD to be None (model missing) or a populated dictionary (model exists)
+    hd_metric = fom_data.get("hellinger_distance")
+    if hd_metric is not None:
+        assert "value" in hd_metric, "Hellinger distance is missing its float value."
+        assert "kind" in hd_metric, "Hellinger distance is missing its kind string."
+
+    try:
+        # Initialize from JSON (throws if the structures don't match)
+        DeviceMetadata(**trace_data["device"])
+
+        # Semantically validate both the first and the last steps
+        CompilationStep(**trace_data["steps"][0])
+        CompilationStep(**last_step_data)
+
+    except TypeError as e:
+        pytest.fail(
+            f"Semantic Validation Failed! The generated JSON does not match your Python dataclasses. Error: {e}"
+        )