Upgrade console logger and allow multiple backends

RunnersNum40 · RunnersNum40 · commit 0c65a7eef84c · 2026-02-20T18:00:15.000-08:00
diff --git a/docs/callbacks/index.md b/docs/callbacks/index.md
@@ -31,7 +31,7 @@ via the `callback` argument to `learn`.
 from jax import random as jr
 
 from lerax.algorithm import PPO
-from lerax.callback import LoggingCallback, ProgressBarCallback, TensorBoardBackend
+from lerax.callback import ConsoleBackend, LoggingCallback, TensorBoardBackend
 from lerax.env.classic_control import CartPole
 from lerax.policy import MLPActorCriticPolicy
 
@@ -41,29 +41,29 @@ env = CartPole()
 policy = MLPActorCriticPolicy(env=env, key=policy_key)
 algo = PPO()
 
-logger = LoggingCallback(TensorBoardBackend(), env=env, policy=policy)
-callbacks = [
-    ProgressBarCallback(total_timesteps=2**16, env=env, policy=policy),
-    logger,
-]
+logger = LoggingCallback(
+    [TensorBoardBackend(), ConsoleBackend(total_timesteps=2**16)],
+    env=env,
+    policy=policy,
+)
 
 policy = algo.learn(
     env,
     policy,
     total_timesteps=2**16,
     key=learn_key,
-    callback=callbacks,
+    callback=logger,
 )
 logger.close()
 ```
 
 ## Built-in callbacks
 
-- [`ProgressBarCallback`](progress_bar.md):
-  Rich-based progress bar showing iterations, elapsed/remaining time, and iterations per second.
-
 - [`LoggingCallback`](logging.md):
-  Logs training metrics (learning rate, training log entries, episode return/length EMAs) to TensorBoard, Aim, or Weights & Biases via a pluggable backend.
+  Logs training metrics (learning rate, training log entries, episode return/length EMAs) to one or more pluggable backends. Use `ConsoleBackend` for a live terminal display with progress bar and metrics table, `TensorBoardBackend` for TensorBoard, or `WandbBackend` for Weights & Biases.
+
+- [`ProgressBarCallback`](progress_bar.md):
+  Standalone Rich progress bar callback. For most use cases prefer `ConsoleBackend` inside `LoggingCallback` instead, which provides both a progress bar and a live metrics table.
 
 - `CallbackList`:
   Aggregates multiple callbacks and forwards all hooks to each one. Used automatically when you pass a list of callbacks.
diff --git a/docs/callbacks/logging.md b/docs/callbacks/logging.md
@@ -88,11 +88,18 @@ backend = WandbBackend(
 
 ### ConsoleBackend
 
-Prints metrics to the terminal using Rich. Useful for quick debugging without a logging server.
+Displays a live metrics table and progress bar in the terminal using [Rich](https://rich.readthedocs.io/).
+On each iteration the metrics table is updated in-place (not appended), keeping the display compact, with a progress bar rendered below it.
+
+When `total_timesteps` is provided, the progress bar shows completion, elapsed/remaining time, and throughput. Without it, metrics are printed as simple key=value lines.
 
 ```py
 from lerax.callback import ConsoleBackend
 
+# With progress bar and live metrics table
+backend = ConsoleBackend(total_timesteps=2**16)
+
+# Without progress bar (plain text output)
 backend = ConsoleBackend()
 ```
 
diff --git a/docs/callbacks/progress_bar.md b/docs/callbacks/progress_bar.md
@@ -5,6 +5,11 @@ description: Display a live Rich progress bar during training.
 
 # ProgressBarCallback
 
+!!! tip "Prefer ConsoleBackend"
+    For most use cases, prefer [`ConsoleBackend`](logging.md#consolebackend) inside
+    `LoggingCallback` instead. It provides the same progress bar plus a live
+    metrics table, and avoids needing a separate callback.
+
 `ProgressBarCallback` displays a terminal progress bar using [Rich](https://rich.readthedocs.io/).
 It shows:
 
diff --git a/examples/gym_environment.py b/examples/gym_environment.py
@@ -2,7 +2,7 @@
 from jax import random as jr
 
 from lerax.algorithm import PPO
-from lerax.callback import LoggingCallback, ProgressBarCallback, TensorBoardBackend
+from lerax.callback import ConsoleBackend, LoggingCallback, TensorBoardBackend
 from lerax.compatibility.gym import GymToLeraxEnv
 from lerax.policy import MLPActorCriticPolicy
 
@@ -12,10 +12,11 @@
 env = GymToLeraxEnv(gym_env)
 policy = MLPActorCriticPolicy(env=env, key=policy_key)
 algo = PPO(num_envs=1)  # Vectorization is not supported for Gym environments
-logger = LoggingCallback(TensorBoardBackend(), env=env, policy=policy)
-callbacks = [ProgressBarCallback(2**16), logger]
-
-policy = algo.learn(
-    env, policy, total_timesteps=2**16, key=learn_key, callback=callbacks
+logger = LoggingCallback(
+    [TensorBoardBackend(), ConsoleBackend(total_timesteps=2**16)],
+    env=env,
+    policy=policy,
 )
+
+policy = algo.learn(env, policy, total_timesteps=2**16, key=learn_key, callback=logger)
 logger.close()
diff --git a/examples/gymnax_environment.py b/examples/gymnax_environment.py
@@ -2,7 +2,7 @@
 from jax import random as jr
 
 from lerax.algorithm import PPO
-from lerax.callback import LoggingCallback, ProgressBarCallback, TensorBoardBackend
+from lerax.callback import ConsoleBackend, LoggingCallback, TensorBoardBackend
 from lerax.compatibility.gymnax import GymnaxToLeraxEnv
 from lerax.policy import MLPActorCriticPolicy
 
@@ -13,10 +13,11 @@
 
 policy = MLPActorCriticPolicy(env=env, key=policy_key)
 algo = PPO()
-logger = LoggingCallback(TensorBoardBackend(), env=env, policy=policy)
-callbacks = [ProgressBarCallback(2**16), logger]
-
-policy = algo.learn(
-    env, policy, total_timesteps=2**16, key=learn_key, callback=callbacks
+logger = LoggingCallback(
+    [TensorBoardBackend(), ConsoleBackend(total_timesteps=2**16)],
+    env=env,
+    policy=policy,
 )
+
+policy = algo.learn(env, policy, total_timesteps=2**16, key=learn_key, callback=logger)
 logger.close()
diff --git a/examples/ppo.py b/examples/ppo.py
@@ -1,7 +1,7 @@
 from jax import random as jr
 
 from lerax.algorithm import PPO
-from lerax.callback import LoggingCallback, ProgressBarCallback, WandbBackend
+from lerax.callback import ConsoleBackend, LoggingCallback, TensorBoardBackend
 from lerax.env.classic_control import CartPole
 from lerax.policy import MLPActorCriticPolicy
 
@@ -10,12 +10,15 @@
 env = CartPole()
 policy = MLPActorCriticPolicy(env=env, key=policy_key)
 algo = PPO()
+total_timesteps = 2**16
 logger = LoggingCallback(
-    WandbBackend(project="lerax"), env=env, policy=policy, video_interval=1
+    [TensorBoardBackend(), ConsoleBackend(total_timesteps=total_timesteps)],
+    env=env,
+    policy=policy,
+    video_interval=1,
 )
-callbacks = [ProgressBarCallback(2**16), logger]
 
 policy = algo.learn(
-    env, policy, total_timesteps=2**16, key=learn_key, callback=callbacks
+    env, policy, total_timesteps=total_timesteps, key=learn_key, callback=logger
 )
 logger.close()
diff --git a/src/lerax/callback/logging/callback.py b/src/lerax/callback/logging/callback.py
@@ -3,7 +3,7 @@
 import concurrent.futures
 import dataclasses
 import os
-from collections.abc import Callable
+from collections.abc import Callable, Sequence
 from datetime import datetime
 from functools import partial
 from typing import Any
@@ -205,7 +205,7 @@ def _make_video_recorder(
     video_width: int,
     video_height: int,
     video_fps: float,
-    backend: AbstractLoggingBackend,
+    backends: list[AbstractLoggingBackend],
     executor: concurrent.futures.ThreadPoolExecutor,
 ) -> Callable[..., None]:
     """
@@ -224,7 +224,7 @@ def _make_video_recorder(
         video_width: Render width in pixels.
         video_height: Render height in pixels.
         video_fps: Playback frames per second.
-        backend: Logging backend to forward video frames to.
+        backends: Logging backends to forward video frames to.
         executor: Thread pool to run the recording work in.
     """
 
@@ -349,7 +349,8 @@ def render_frame(env_state) -> np.ndarray:
             renderer.close()
 
             frames_arr = np.stack(frames).astype(np.uint8)
-            backend.log_video("eval/video", frames_arr, step, fps=video_fps)
+            for b in backends:
+                b.log_video("eval/video", frames_arr, step, fps=video_fps)
         except Exception as exc:
             import warnings
 
@@ -407,7 +408,7 @@ class LoggingCallback(AbstractCallback[EmptyCallbackState, LoggingCallbackStepSt
         alpha: EMA smoothing factor for episode statistics.
 
     Args:
-        backend: Logging backend to send metrics to.
+        backend: Logging backend (or list of backends) to send metrics to.
         name: Explicit run name. When ``None``, a name is generated from the
             environment name, policy name, and a timestamp. If neither ``env``
             nor ``policy`` are provided, falls back to a plain timestamp.
@@ -423,7 +424,7 @@ class LoggingCallback(AbstractCallback[EmptyCallbackState, LoggingCallbackStepSt
         video_fps: Playback frames per second.
     """
 
-    _backend: AbstractLoggingBackend = eqx.field(static=True)
+    _backends: list[AbstractLoggingBackend] = eqx.field(static=True)
     _name: str | None = eqx.field(static=True)
     _hparams: dict[str, Any] | None = eqx.field(static=True)
     alpha: float
@@ -434,7 +435,7 @@ class LoggingCallback(AbstractCallback[EmptyCallbackState, LoggingCallbackStepSt
 
     def __init__(
         self,
-        backend: AbstractLoggingBackend,
+        backend: AbstractLoggingBackend | Sequence[AbstractLoggingBackend],
         name: str | None = None,
         env: AbstractEnvLike | None = None,
         policy: AbstractPolicy | None = None,
@@ -446,7 +447,11 @@ def __init__(
         video_height: int = 480,
         video_fps: float = 50.0,
     ) -> None:
-        self._backend = backend
+        if isinstance(backend, AbstractLoggingBackend):
+            self._backends = [backend]
+        else:
+            self._backends = list(backend)
+
         self._hparams = hparams
         self.alpha = alpha
 
@@ -461,7 +466,8 @@ def __init__(
             name = "_".join(parts)
         self._name = name
 
-        backend.open(name)
+        for b in self._backends:
+            b.open(name)
 
         if video_interval > 0:
             executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -472,7 +478,7 @@ def __init__(
                 video_width,
                 video_height,
                 video_fps,
-                backend,
+                self._backends,
                 executor,
             )
         else:
@@ -513,9 +519,8 @@ def on_iteration(
         scalars["episode/return"] = step_state.average_return.mean()
         scalars["episode/length"] = step_state.average_length.mean()
 
-        callback_with_numpy_wrapper(self._backend.log_scalars, ordered=True)(
-            scalars, last_step
-        )
+        for b in self._backends:
+            callback_with_numpy_wrapper(b.log_scalars, ordered=True)(scalars, last_step)
 
         if self._record_video_fn is not None:
             video_key, key = jr.split(key)
@@ -537,7 +542,8 @@ def on_training_start(
         )
         hparams.update(self._hparams or {})
 
-        callback_wrapper(lambda: self._backend.log_hparams(hparams), ordered=True)()
+        for b in self._backends:
+            callback_wrapper(lambda b=b: b.log_hparams(hparams), ordered=True)()
         return ctx.state
 
     def on_training_end(
@@ -548,13 +554,14 @@ def on_training_end(
     def close(self) -> None:
         """Flush pending data and release backend resources.
 
-        Call this after all ``learn()`` calls are complete. The backend
-        remains open between ``learn()`` calls so that metrics from
+        Call this after all ``learn()`` calls are complete. The backends
+        remain open between ``learn()`` calls so that metrics from
         multiple stages are logged to the same run.
         """
         if self._video_executor is not None:
             self._video_executor.shutdown(wait=True)
-        self._backend.close()
+        for b in self._backends:
+            b.close()
 
     def continue_training(
         self, ctx: IterationContext, *, key: Key[Array, ""]
diff --git a/src/lerax/callback/logging/console.py b/src/lerax/callback/logging/console.py
diff --git a/tests/integration/test_ppo_callbacks.py b/tests/integration/test_ppo_callbacks.py