From 89bfce15ec6a2898e6ef39e8546bbe3a3f42bec9 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Thu, 11 Jun 2026 16:21:51 +0100
Subject: [PATCH 1/6] =?UTF-8?q?fix(sub-003):=20per-agent=20lock=20for=20au?=
 =?UTF-8?q?to-switch=20=E2=80=94=20serialize=20concurrent=20failures=20(#7?=
 =?UTF-8?q?99)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Concurrent subscription failures on the same agent (two chat requests, or a
chat overlapping a scheduled task) both ran handle_subscription_failure's
read→decide→assign→restart sequence with no mutual exclusion: both could pick
the same alternative, both assign_subscription_to_agent, and both fire
_restart_agent — wedging the container, duplicating the switch notification, or
tripping the #421 was_already_running ambiguity.

- Add a per-agent asyncio.Lock (lazy + event-loop-safe, mirroring
  agent_call_limiter.py rather than defaultdict(asyncio.Lock)) wrapping the
  whole switch critical section.
- Add a stale-failure guard: snapshot the subscription at handler entry and,
  under the lock, bail when it changed — so 3+ viable subscriptions don't
  cascade A→B→C (a loser whose failure was about the now-previous sub).
- Tests: concurrent 2-sub (exactly one switch), 3-sub no-cascade regression,
  distinct-agent locks. Pins the race the #606 carve-out test deferred.

Process-local lock is sufficient today (single backend process; the scheduler
delegates execution to the backend via /api/internal/execute-task rather than
calling this module in its own process). #1037 (drain-aware recreate) is split
into its own PR.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../services/subscription_auto_switch.py      | 141 ++++++++---
 ...st_subscription_auto_switch_concurrency.py | 231 ++++++++++++++++++
 2 files changed, 337 insertions(+), 35 deletions(-)
 create mode 100644 tests/unit/test_subscription_auto_switch_concurrency.py

diff --git a/src/backend/services/subscription_auto_switch.py b/src/backend/services/subscription_auto_switch.py
index 6707a7a5..7ab05767 100644
--- a/src/backend/services/subscription_auto_switch.py
+++ b/src/backend/services/subscription_auto_switch.py
@@ -20,6 +20,7 @@
 tests pinning that contract.
 """
 
+import asyncio
 import logging
 from typing import Optional
 
@@ -29,6 +30,55 @@
 logger = logging.getLogger(__name__)
 
 
+# ---------------------------------------------------------------------------
+# Per-agent switch lock (#799)
+# ---------------------------------------------------------------------------
+#
+# Concurrent subscription failures on the SAME agent (two chat requests, or a
+# chat overlapping a scheduled task) both enter `handle_subscription_failure`
+# and, without mutual exclusion, both pick the same alternative, both
+# `assign_subscription_to_agent`, and both fire `_restart_agent` — the second
+# `container_stop` racing the first `start_agent_internal` wedges the container,
+# duplicates the switch notification, or trips the #421 `was_already_running`
+# ambiguity. A per-agent lock serializes the read→decide→assign→restart window.
+#
+# Mirrors the event-loop-safe lazy pattern in `services/agent_call_limiter.py`
+# (module dict + a lazily-created guard) rather than `defaultdict(asyncio.Lock)`:
+# a defaultdict binds each lock to whatever event loop is current at first key
+# access and persists it, which breaks across pytest's per-test loops
+# ("Future attached to a different loop"). Creating the locks lazily on the
+# running loop avoids that.
+#
+# INVARIANT: process-local. Correct only while (a) the backend runs a single
+# process and (b) the scheduler delegates execution to the backend via
+# `/api/internal/execute-task` rather than calling this module in its own
+# process — both true today. If the backend ever runs multiple workers, escalate
+# to a Redis `SETNX` lock keyed `auto_switch:{agent_name}` (TTL ≥ longest
+# plausible container start, ~60s).
+_AGENT_SWITCH_LOCKS: dict[str, asyncio.Lock] = {}
+_AGENT_SWITCH_LOCKS_GUARD: Optional[asyncio.Lock] = None
+
+
+async def agent_switch_lock(agent_name: str) -> asyncio.Lock:
+    """Return the per-agent switch lock, creating it lazily on the running loop."""
+    global _AGENT_SWITCH_LOCKS_GUARD
+    if _AGENT_SWITCH_LOCKS_GUARD is None:
+        _AGENT_SWITCH_LOCKS_GUARD = asyncio.Lock()
+    lock = _AGENT_SWITCH_LOCKS.get(agent_name)
+    if lock is None:
+        async with _AGENT_SWITCH_LOCKS_GUARD:
+            lock = _AGENT_SWITCH_LOCKS.setdefault(agent_name, asyncio.Lock())
+    return lock
+
+
+def _reset_locks_for_test() -> None:
+    """Test hook: drop all per-agent locks + the guard so each test's event loop
+    starts clean (locks are loop-bound)."""
+    global _AGENT_SWITCH_LOCKS_GUARD
+    _AGENT_SWITCH_LOCKS.clear()
+    _AGENT_SWITCH_LOCKS_GUARD = None
+
+
 # Substrings that indicate an auth-class subscription failure. Mirrors the
 # scheduler's classification at `src/scheduler/service.py` (which now imports
 # this same list to keep the two surfaces from drifting).
@@ -101,50 +151,71 @@ async def handle_subscription_failure(
     Returns:
         dict with switch details if auto-switch occurred, None otherwise.
     """
-    # 1. Check if auto-switch is enabled (default: on, #441)
+    # 1. Check if auto-switch is enabled (default: on, #441). Cheap, lock-free —
+    # a disabled platform never contends for the per-agent lock.
     enabled = db.get_setting_value("auto_switch_subscriptions", default="true") == "true"
     if not enabled:
         return None
 
-    # 2. Check if agent has a subscription assigned
-    current_sub_id = db.get_agent_subscription_id(agent_name)
-    if not current_sub_id:
+    # 2. Snapshot the agent's subscription BEFORE acquiring the lock. This is the
+    # subscription our failure was (approximately) about. If a concurrent failure
+    # switches the agent off it while we wait for the lock, our failure is stale.
+    sub_at_entry = db.get_agent_subscription_id(agent_name)
+    if not sub_at_entry:
         return None
 
-    # 3. Record the failure event in the rate-limit table. Auth-class events
-    # share the same table — the table tracks "subscription failure events"
-    # generically; `is_subscription_rate_limited` treats any event in the 2h
-    # window as a reason to skip the subscription as a candidate, which is the
-    # behavior we want for both kinds of failure.
-    consecutive_count = db.record_rate_limit_event(
-        agent_name=agent_name,
-        subscription_id=current_sub_id,
-        error_message=error_message,
-    )
-
-    # 4. Find a viable alternative subscription
-    alternative = db.select_best_alternative_subscription(current_sub_id)
-    if not alternative:
-        logger.warning(
-            f"[SUB-003] Agent '{agent_name}' hit a {failure_kind} failure on "
-            f"subscription {current_sub_id} (event #{consecutive_count}) "
-            f"but no viable alternative subscription is available"
+    # #799: serialize the read→decide→assign→restart window per agent so two
+    # concurrent failures on the same agent can't both switch + restart it.
+    async with await agent_switch_lock(agent_name):
+        # Re-read under the lock. If another coroutine already switched the agent
+        # off `sub_at_entry`, this failure is stale — return rather than switch
+        # again. This is what makes the fix correct for 3+ subscriptions: without
+        # it, a loser whose failure was about sub-A would attribute it to the new
+        # current sub-B and cascade A→B→C (#799 / Codex C8).
+        current_sub_id = db.get_agent_subscription_id(agent_name)
+        if current_sub_id != sub_at_entry:
+            logger.info(
+                f"[SUB-003] Agent '{agent_name}' already switched off subscription "
+                f"{sub_at_entry} (now {current_sub_id}) before this {failure_kind} "
+                f"failure acquired the lock — stale failure, skipping"
+            )
+            return None
+
+        # 3. Record the failure event in the rate-limit table. Auth-class events
+        # share the same table — the table tracks "subscription failure events"
+        # generically; `is_subscription_rate_limited` treats any event in the 2h
+        # window as a reason to skip the subscription as a candidate, which is the
+        # behavior we want for both kinds of failure.
+        consecutive_count = db.record_rate_limit_event(
+            agent_name=agent_name,
+            subscription_id=current_sub_id,
+            error_message=error_message,
         )
-        return None
 
-    # Get current subscription name for logging / notification
-    current_sub = db.get_subscription(current_sub_id)
-    old_name = current_sub.name if current_sub else current_sub_id
+        # 4. Find a viable alternative subscription
+        alternative = db.select_best_alternative_subscription(current_sub_id)
+        if not alternative:
+            logger.warning(
+                f"[SUB-003] Agent '{agent_name}' hit a {failure_kind} failure on "
+                f"subscription {current_sub_id} (event #{consecutive_count}) "
+                f"but no viable alternative subscription is available"
+            )
+            return None
 
-    # 5. Perform the switch
-    return await _perform_auto_switch(
-        agent_name=agent_name,
-        old_subscription_id=current_sub_id,
-        old_subscription_name=old_name,
-        new_subscription=alternative,
-        failure_kind=failure_kind,
-        event_count=consecutive_count,
-    )
+        # Get current subscription name for logging / notification
+        current_sub = db.get_subscription(current_sub_id)
+        old_name = current_sub.name if current_sub else current_sub_id
+
+        # 5. Perform the switch (still under the lock — the assign + restart must
+        # not interleave with a concurrent switch for this agent).
+        return await _perform_auto_switch(
+            agent_name=agent_name,
+            old_subscription_id=current_sub_id,
+            old_subscription_name=old_name,
+            new_subscription=alternative,
+            failure_kind=failure_kind,
+            event_count=consecutive_count,
+        )
 
 
 async def handle_rate_limit_error(
diff --git a/tests/unit/test_subscription_auto_switch_concurrency.py b/tests/unit/test_subscription_auto_switch_concurrency.py
new file mode 100644
index 00000000..24f50932
--- /dev/null
+++ b/tests/unit/test_subscription_auto_switch_concurrency.py
@@ -0,0 +1,231 @@
+"""Concurrency tests for SUB-003 auto-switch (#799).
+
+Pins the per-agent switch lock + stale-failure guard added in
+``services/subscription_auto_switch.py``. These are the race the carve-out
+test ``test_subscription_auto_switch_no_cred_import.py`` explicitly left out
+of scope ("Explicit non-claim: this is NOT a concurrency test").
+
+Bug being fixed: two subscription failures on the SAME agent (two chat
+requests, or a chat overlapping a scheduled task) both ran the
+read→decide→assign→restart sequence with no mutual exclusion, so both could
+pick the same alternative, both ``assign_subscription_to_agent``, and both
+fire ``_restart_agent`` — wedging the container / duplicating the switch.
+
+What's pinned here:
+  - Part A: ``handle_subscription_failure`` is serialized per agent by
+    ``agent_switch_lock``, so concurrent failures yield exactly ONE switch.
+  - Part B (Codex C8): the loser snapshots the subscription at entry and,
+    after taking the lock, bails when it changed — so 3+ viable
+    subscriptions do NOT cascade A→B→C.
+
+Determinism: rather than rely on asyncio interleaving (``asyncio.Lock.acquire``
+does not yield on a free lock, so a naive gather lets the winner switch before
+the loser snapshots), the tests PRE-HOLD the agent lock. Both coroutines then
+park at ``async with lock`` having already snapshotted the old sub; releasing
+the lock lets exactly one win and the other observe the post-switch state.
+
+Modules under test:
+    src/backend/services/subscription_auto_switch.py::handle_subscription_failure
+    src/backend/services/subscription_auto_switch.py::agent_switch_lock
+"""
+
+from __future__ import annotations
+
+import asyncio
+import importlib
+import importlib.util
+import os
+import sys
+import types
+from unittest.mock import MagicMock
+
+import pytest
+
+
+_BACKEND = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..", "src", "backend")
+)
+
+# Slots this file stubs/loads at import time. The autouse snapshot/restore
+# fixture (precedent: test_subscription_auto_switch_no_cred_import.py) bounds
+# the blast radius so these don't leak into other files sharing the session.
+_STUBBED_MODULE_NAMES = [
+    "services",
+    "database",
+    "db_models",
+    "services.subscription_auto_switch",
+]
+
+
+@pytest.fixture(autouse=True)
+def _restore_sys_modules():
+    saved = {name: sys.modules.get(name) for name in _STUBBED_MODULE_NAMES}
+    try:
+        yield
+    finally:
+        for name, value in saved.items():
+            if value is None:
+                sys.modules.pop(name, None)
+            else:
+                sys.modules[name] = value
+
+
+def _load_auto_switch(mock_db):
+    """Load ``subscription_auto_switch`` in isolation with ``database`` stubbed.
+
+    The module's top-level imports are only ``asyncio``/``logging``/``typing``
+    + ``from database import db`` + ``from db_models import NotificationCreate``
+    (the ``services.*`` imports are lazy, inside ``_restart_agent`` /
+    ``_perform_auto_switch``), so loading it by file location under its real
+    dotted name needs no real ``services`` package boot.
+    """
+    sys.modules.setdefault("services", types.ModuleType("services"))
+
+    db_module = types.ModuleType("database")
+    db_module.db = mock_db
+    sys.modules["database"] = db_module
+
+    # db_models is pure Pydantic — import the real one rather than stubbing
+    # (a bare stub would persist and break unrelated tests).
+    if "db_models" not in sys.modules:
+        if _BACKEND not in sys.path:
+            sys.path.insert(0, _BACKEND)
+        import db_models  # noqa: F401 — registers in sys.modules
+
+    sys.modules.pop("services.subscription_auto_switch", None)
+    spec = importlib.util.spec_from_file_location(
+        "services.subscription_auto_switch",
+        os.path.join(_BACKEND, "services", "subscription_auto_switch.py"),
+    )
+    mod = importlib.util.module_from_spec(spec)
+    sys.modules["services.subscription_auto_switch"] = mod
+    spec.loader.exec_module(mod)
+    return mod
+
+
+def _sub(sub_id: str) -> MagicMock:
+    """A subscription stand-in with ``.id`` / ``.name`` (``.name`` must be set
+    after construction — ``MagicMock(name=...)`` is the repr name, not a field)."""
+    s = MagicMock()
+    s.id = sub_id
+    s.name = f"sub-{sub_id}"
+    return s
+
+
+def _build_db(initial_sub: str, alt_map: dict) -> MagicMock:
+    """A ``db`` stub whose current subscription is mutable.
+
+    ``alt_map`` maps current-sub-id → the subscription
+    ``select_best_alternative_subscription`` returns for it (or absent → None).
+    """
+    state = {"current": initial_sub}
+    db = MagicMock()
+    db.get_setting_value.return_value = "true"
+    db.get_agent_subscription_id.side_effect = lambda _agent: state["current"]
+    db.record_rate_limit_event.return_value = 1
+    db.get_subscription.side_effect = lambda sub_id: _sub(sub_id)
+    db.select_best_alternative_subscription.side_effect = (
+        lambda cur: alt_map.get(cur)
+    )
+    db._state = state  # expose for assertions / the perform spy
+    return db
+
+
+def _install_perform_spy(mod, db):
+    """Replace ``_perform_auto_switch`` with a spy that simulates the assign
+    (mutates the stub's current sub) and records each switch."""
+    calls: list[str] = []
+
+    async def spy(*, agent_name, old_subscription_id, old_subscription_name,
+                  new_subscription, failure_kind, event_count):
+        calls.append(new_subscription.id)
+        db._state["current"] = new_subscription.id  # the real assign side effect
+        return {
+            "switched": True,
+            "agent_name": agent_name,
+            "old_subscription": old_subscription_name,
+            "new_subscription": new_subscription.name,
+        }
+
+    mod._perform_auto_switch = spy
+    return calls
+
+
+@pytest.mark.asyncio
+async def test_concurrent_failures_switch_exactly_once_two_subs():
+    """Two concurrent failures on one agent (A→B viable) → exactly ONE switch;
+    the loser returns None."""
+    db = _build_db("A", alt_map={"A": _sub("B")})
+    mod = _load_auto_switch(db)
+    mod._reset_locks_for_test()
+    calls = _install_perform_spy(mod, db)
+
+    # Pre-hold the agent lock so both coroutines snapshot "A" and park at
+    # `async with lock` before either can switch.
+    lock = await mod.agent_switch_lock("burst-agent")
+    await lock.acquire()
+
+    t1 = asyncio.create_task(mod.handle_subscription_failure("burst-agent", "429", "rate_limit"))
+    t2 = asyncio.create_task(mod.handle_subscription_failure("burst-agent", "429", "rate_limit"))
+    await asyncio.sleep(0)  # let both reach `await lock.acquire()`
+
+    lock.release()
+    results = await asyncio.gather(t1, t2)
+
+    assert calls == ["B"], f"expected exactly one switch to B, got {calls!r}"
+    assert db._state["current"] == "B"
+    switched = [r for r in results if r is not None]
+    skipped = [r for r in results if r is None]
+    assert len(switched) == 1 and len(skipped) == 1, results
+    assert db.assign_subscription_to_agent.call_count == 0  # spy stands in
+
+
+@pytest.mark.asyncio
+async def test_concurrent_failures_no_cascade_three_subs():
+    """C8 regression (CRITICAL): with A,B,C all viable, two concurrent failures
+    that both observed sub-A must NOT cascade A→B→C.
+
+    Without the stale-failure guard, the loser would read current=B (after the
+    winner's A→B switch) and switch B→C. The guard makes it a no-op: it never
+    even reaches alternative selection for B.
+    """
+    db = _build_db("A", alt_map={"A": _sub("B"), "B": _sub("C")})
+    mod = _load_auto_switch(db)
+    mod._reset_locks_for_test()
+    calls = _install_perform_spy(mod, db)
+
+    lock = await mod.agent_switch_lock("burst-agent")
+    await lock.acquire()
+
+    t1 = asyncio.create_task(mod.handle_subscription_failure("burst-agent", "429", "rate_limit"))
+    t2 = asyncio.create_task(mod.handle_subscription_failure("burst-agent", "429", "rate_limit"))
+    await asyncio.sleep(0)
+
+    lock.release()
+    results = await asyncio.gather(t1, t2)
+
+    assert calls == ["B"], f"expected ONLY A→B, got cascade {calls!r}"
+    assert db._state["current"] == "B"
+    assert len([r for r in results if r is None]) == 1, "loser must no-op"
+    # The guard short-circuits BEFORE alternative selection for the new sub:
+    selected = [c.args[0] for c in db.select_best_alternative_subscription.call_args_list]
+    assert "B" not in selected, (
+        f"loser reached alternative selection for the post-switch sub — the "
+        f"stale-failure guard did not fire (selected={selected!r})"
+    )
+
+
+@pytest.mark.asyncio
+async def test_different_agents_use_distinct_locks():
+    """Distinct agents get distinct lock objects (so they don't serialize);
+    the same agent always gets the same lock."""
+    db = _build_db("A", alt_map={})
+    mod = _load_auto_switch(db)
+    mod._reset_locks_for_test()
+
+    lock_a1 = await mod.agent_switch_lock("agent-a")
+    lock_a2 = await mod.agent_switch_lock("agent-a")
+    lock_b = await mod.agent_switch_lock("agent-b")
+
+    assert lock_a1 is lock_a2, "same agent must reuse its lock"
+    assert lock_a1 is not lock_b, "different agents must not share a lock"

From 146e244d3bbe3f408b02deb4c3e90c755b0892b3 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Sat, 13 Jun 2026 14:36:04 +0100
Subject: [PATCH 2/6] feat: rotate subscription credentials via hot-reload, not
 container recreate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rotating an agent's subscription token recreated the container, making
"rotate a credential" and "kill every in-flight turn" the same operation
(#1037 — one 429 on a shared subscription auto-switched and destroyed every
parallel execution). Token rotation now hot-reloads the running container;
recreate is reserved for image/template/auth-mode changes (TARGET_ARCHITECTURE
§Agent Runtime).

- Agent server: POST /api/credentials/reload-token mutates the process
  CLAUDE_CODE_OAUTH_TOKEN env so the NEXT claude subprocess uses the rotated
  token while in-flight subprocesses finish on the old one (single uvicorn
  worker; Claude authenticates purely from the env var). Does not rewrite
  .env/.mcp.json.
- Durable override: token persisted to /var/lib/trinity/oauth-token (0600,
  writable layer, NOT the /home/developer volume); startup.sh exports it before
  launching the agent server so a plain fleet restart (raw stop+start, bypasses
  start_agent_internal) keeps the rotated token. Self-reconciling — wiped on
  recreate, so a DB-driven recreate re-bakes Config.Env from the DB token.
- Three producer paths converted under the #799 agent_switch_lock: auto-switch
  (SUB-003), manual sub->sub reassignment (auth-mode changes still recreate),
  and key-rollover fan-out on the POST /api/subscriptions upsert (best-effort).
- Back-compat: old base images return 404 -> fall back to _restart_agent
  (today's behavior, no regression; per-agent adoption on next recreate).

Builds on #799 (per-agent agent_switch_lock). 19 new unit tests.

Closes #1089

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docker/base-image/Dockerfile                  |   7 +
 docker/base-image/agent_server/models.py      |  17 ++
 .../agent_server/routers/credentials.py       |  48 ++++
 docker/base-image/startup.sh                  |  14 +
 docs/memory/architecture.md                   |  11 +-
 docs/memory/feature-flows.md                  |   1 +
 .../feature-flows/subscription-auto-switch.md |  25 +-
 docs/memory/requirements.md                   |  27 ++
 docs/planning/TARGET_ARCHITECTURE.md          |   2 +-
 src/backend/routers/subscriptions.py          | 115 +++++---
 .../services/subscription_auto_switch.py      | 102 +++++++-
 tests/unit/test_reload_token_endpoint.py      | 121 +++++++++
 .../test_subscription_auto_switch_pingpong.py | 246 ++++++++++++++++++
 .../test_subscription_reassign_hotreload.py   | 241 +++++++++++++++++
 14 files changed, 933 insertions(+), 44 deletions(-)
 create mode 100644 tests/unit/test_reload_token_endpoint.py
 create mode 100644 tests/unit/test_subscription_reassign_hotreload.py

diff --git a/docker/base-image/Dockerfile b/docker/base-image/Dockerfile
index 98ce515c..2056be45 100644
--- a/docker/base-image/Dockerfile
+++ b/docker/base-image/Dockerfile
@@ -130,6 +130,13 @@ RUN mkdir -p /workspace /data /logs && \
 RUN mkdir -p /tmp/secure && \
     chmod 1777 /tmp/secure
 
+# #1089: writable-layer dir for the subscription-token hot-reload override
+# (/var/lib/trinity/oauth-token). Owned by the agent (UID 1000) so the
+# agent-server process can write it; on the writable layer (NOT /home/developer)
+# so it survives a plain stop+start but is wiped on recreate.
+RUN mkdir -p /var/lib/trinity && \
+    chown developer:developer /var/lib/trinity
+
 USER developer
 
 CMD ["/app/startup.sh"]
diff --git a/docker/base-image/agent_server/models.py b/docker/base-image/agent_server/models.py
index 4fa19d96..c570eca2 100644
--- a/docker/base-image/agent_server/models.py
+++ b/docker/base-image/agent_server/models.py
@@ -261,3 +261,20 @@ class CredentialInjectResponse(BaseModel):
     """Response from credential injection"""
     status: str  # "success"
     files_written: List[str]
+
+
+class TokenReloadRequest(BaseModel):
+    """Request to hot-reload the subscription OAuth token (#1089).
+
+    Surgical alternative to a container recreate: mutates the agent-server
+    process env so the NEXT claude subprocess uses the rotated token while
+    in-flight turns keep their already-inherited old token and finish.
+    """
+    token: str  # CLAUDE_CODE_OAUTH_TOKEN value to apply
+    remove_api_key: bool = False  # also drop ANTHROPIC_API_KEY from env
+
+
+class TokenReloadResponse(BaseModel):
+    """Response from a subscription token hot-reload"""
+    status: str  # "success"
+    reloaded: bool
diff --git a/docker/base-image/agent_server/routers/credentials.py b/docker/base-image/agent_server/routers/credentials.py
index cb0a40db..da00e18f 100644
--- a/docker/base-image/agent_server/routers/credentials.py
+++ b/docker/base-image/agent_server/routers/credentials.py
@@ -15,6 +15,8 @@
     CredentialReadResponse,
     CredentialInjectRequest,
     CredentialInjectResponse,
+    TokenReloadRequest,
+    TokenReloadResponse,
 )
 from ..state import agent_state
 from ..services.trinity_mcp import inject_trinity_mcp_if_configured
@@ -132,6 +134,52 @@ async def update_credentials(request: CredentialUpdateRequest):
         raise HTTPException(status_code=500, detail=f"Credential update failed: {str(e)}")
 
 
+# Writable-layer override path (#1089). Deliberately NOT under /home/developer —
+# that path is the persistent agent-{name}-workspace volume which
+# `recreate_container_with_updated_config` preserves, so a token written there
+# would survive a recreate and shadow the freshly-baked Config.Env (DB token).
+# The writable layer instead survives a plain stop+start (same container) but is
+# wiped on recreate (new container, fresh layer) — self-reconciling by Docker
+# semantics, no marker logic needed. The directory is created + chowned to UID
+# 1000 in the base-image Dockerfile (before the USER switch).
+_TOKEN_OVERRIDE = Path("/var/lib/trinity/oauth-token")
+
+
+@router.post("/api/credentials/reload-token", response_model=TokenReloadResponse)
+async def reload_subscription_token(request: TokenReloadRequest):
+    """Hot-reload CLAUDE_CODE_OAUTH_TOKEN for the NEXT claude subprocess (#1089).
+
+    Mutates the agent-server process env so the next `subprocess.Popen` for
+    `claude` inherits the rotated token; in-flight subprocesses keep their
+    already-inherited old token and finish. Also persists the token to the
+    writable-layer override so it survives a plain stop+start (fleet restart
+    bypasses `start_agent_internal`, which would otherwise revert to the old
+    Config.Env token — F2).
+
+    Deliberately does NOT rewrite .env / .mcp.json or re-inject Trinity MCP: the
+    subscription token is not a .env credential, and the `/update` / `/inject`
+    endpoints destructively rewrite whole files.
+    """
+    if not request.token:
+        raise HTTPException(status_code=400, detail="token is required")
+
+    os.environ["CLAUDE_CODE_OAUTH_TOKEN"] = request.token
+    if request.remove_api_key:
+        os.environ.pop("ANTHROPIC_API_KEY", None)
+
+    # Persist to the writable-layer override (0600). Parent dir is created +
+    # chowned in the Dockerfile, so the agent (UID 1000) can write here.
+    _TOKEN_OVERRIDE.write_text(request.token)
+    _TOKEN_OVERRIDE.chmod(0o600)
+
+    # Add the new token to the log-redaction set (drops the old exact-match
+    # value; OAuth tokens stay caught by the sk-ant value regex regardless).
+    refresh_credential_values()
+
+    logger.info("Hot-reloaded CLAUDE_CODE_OAUTH_TOKEN (next subprocess; in-flight turns unaffected)")
+    return TokenReloadResponse(status="success", reloaded=True)
+
+
 @router.get("/api/credentials/status")
 async def get_credentials_status():
     """
diff --git a/docker/base-image/startup.sh b/docker/base-image/startup.sh
index 59d0f7a0..4f566159 100644
--- a/docker/base-image/startup.sh
+++ b/docker/base-image/startup.sh
@@ -343,6 +343,20 @@ if [ -d "/config/mcp-servers" ]; then
     done
 fi
 
+# === Rotated subscription token: durable override (#1089) ===
+# A hot-reload (POST /api/credentials/reload-token) persists the rotated
+# CLAUDE_CODE_OAUTH_TOKEN to this writable-layer path so it survives a plain
+# stop+start. The container's baked Config.Env still holds the OLD token and a
+# fleet restart (ops.py) does a raw stop+start that bypasses start_agent_internal
+# — so export the override (when present and non-empty) BEFORE launching the
+# agent server, so the rotated token wins. The file is wiped on recreate (fresh
+# writable layer), so a DB-driven recreate cleanly reverts to the freshly-baked
+# Config.Env token — no marker logic needed.
+if [ -s /var/lib/trinity/oauth-token ]; then
+    export CLAUDE_CODE_OAUTH_TOKEN="$(cat /var/lib/trinity/oauth-token)"
+    echo "Applied rotated subscription token from durable override"
+fi
+
 # Start Agent Web Server (self-contained UI)
 if [ "${ENABLE_AGENT_UI}" = "true" ]; then
     echo "Starting Agent Web UI on port ${AGENT_SERVER_PORT:-8000}..."
diff --git a/docs/memory/architecture.md b/docs/memory/architecture.md
index cd7f5d04..6c973042 100644
--- a/docs/memory/architecture.md
+++ b/docs/memory/architecture.md
@@ -276,12 +276,15 @@ Vector 0.43.1 (`timberio/vector:0.43.1-alpine`). Captures all container stdout/s
 **Internal server** `agent-server.py` (FastAPI, port 8000):
 - `/api/chat` - Claude Code execution (messages persisted to database)
 - `/health` - Health check. Returns `{status}` plus `active_tasks` (concurrent executions across `/api/chat` + `/api/task`), `last_task_at`, `consecutive_failures` (reset on success — consumed by the dispatch breaker #526 and fleet health #307) and the #333 `diagnostics` gauges (#1020). `mailbox_depth` intentionally NOT emitted — no agent-side mailbox until the actor model (#945); the backend derives queue depth from `CapacityManager`. Counters live in `agent_server/state.py`; backend reads them in `monitoring_service.py` with graceful defaults for older images.
-- `/api/credentials/update` - Hot-reload credentials
+- `/api/credentials/update` - Hot-reload credentials (rewrites `.env`/`.mcp.json`)
+- `/api/credentials/reload-token` - Surgical subscription-token hot-reload (#1089): mutates the agent-server process `os.environ["CLAUDE_CODE_OAUTH_TOKEN"]` so the NEXT claude subprocess uses the rotated token while in-flight subprocesses keep theirs; persists to the writable-layer override `/var/lib/trinity/oauth-token` (0600). Does NOT touch `.env`/`.mcp.json`. See [Subscription Token Rotation](#subscription-token-rotation-via-hot-reload-1089)
 - `/api/chat/session` - Context window stats
 - `/api/files`, `/api/files/download` (100MB limit), `/api/files/mkdir` (workspace-confined, #37)
 
 The agent server also runs two loops: the 15-min git `auto_sync` heartbeat (see [Git Sync Health](#git-sync-health-389390)) and the 5s liveness heartbeat (see [Heartbeat Liveness](#heartbeat-liveness-reliability-004-307)).
 
+**Durable subscription-token override (#1089):** `startup.sh` exports `CLAUDE_CODE_OAUTH_TOKEN` from `/var/lib/trinity/oauth-token` (when present, non-empty) **before** launching the agent server, so a token rotated via hot-reload survives a plain stop+start (a fleet restart via `routers/ops.py` does a raw `container_stop`+`container_start` that bypasses `start_agent_internal` and would otherwise revert to the baked `Config.Env` token). The path is deliberately on the writable layer, **not** under the persisted `/home/developer` volume: it survives `stop`→`start` (same container) but is wiped on recreate (fresh layer), so a DB-driven recreate cleanly re-bakes `Config.Env` from the DB and the stale override is gone — self-reconciling, no marker logic. Dir created+chowned to UID 1000 in the base-image Dockerfile.
+
 **Template-supplied pre-check** (SCHED-COND-001, #454): if the template ships an executable `~/.trinity/pre-check`, the backend's internal endpoint `POST /api/internal/agents/{name}/pre-check` runs it via `docker exec` before a cron-triggered chat. Language-agnostic — interpreter selected by shebang. The hook's stdout becomes the chat message; empty stdout + exit 0 records a skipped execution (Claude never invoked). Uses the same `execute_command_in_container` primitive as `git_service.py`, `ssh_service.py`, and the agent terminal — no agent-server HTTP endpoint.
 
 **Persistent chat:** all chat messages auto-saved to SQLite (`chat_sessions`, `chat_messages`) with full observability (costs, context, tool calls, execution time); sessions survive container restarts/deletions; users see only their own messages (admins see all).
@@ -367,6 +370,12 @@ agent:heartbeat:misses:{name} → STRING(int), ~60s TTL. Consecutive-miss counte
 
 Trigger-boundary dedup — policy in Architectural Invariant #18, table DDL under `idempotency_keys`. `services/idempotency_service.py` (key derivation + `begin`/`complete`/`fail`) over `db/idempotency.py`. The `(scope, key)` PRIMARY KEY is the atomic claim: `claim()` INSERTs an `in_flight` row; a concurrent loser catches `IntegrityError` and reads the surviving row — cross-process safe across uvicorn workers and the standalone scheduler (shared SQLite file). Lifecycle: `claim` → (`attach_execution`) → `complete` (stores `response_snapshot` for replay) or `release` (deletes the in_flight row so a failed attempt can retry; never deletes a `completed` row). Rows older than 24h are treated as expired and re-claimed; the cleanup service purges them (`idempotency_purge_expired`). Duplicates within 24h short-circuit with the original result + `X-Idempotent-Replay: true`; an in-flight duplicate returns 409. Fail-open — a key never blocks a real execution.
 
+### Subscription Token Rotation via Hot-Reload (#1089)
+
+Rotating an agent's subscription token used to recreate the container, making "rotate a credential" and "kill every in-flight turn" the same operation (#1037). Token rotation now hot-reloads the running container; recreate is reserved for image/template/auth-**mode** changes (TARGET_ARCHITECTURE §Agent Runtime). The agent server authenticates Claude purely from `CLAUDE_CODE_OAUTH_TOKEN` (no `.credentials.json`) and is a single uvicorn worker, so mutating its process env makes the **next** subprocess use the new token while in-flight subprocesses finish on the old one.
+
+Backend orchestration in `services/subscription_auto_switch.py`: `_hot_reload_subscription_token(agent_name)` POSTs the agent's current DB token to the agent-server `POST /api/credentials/reload-token`, falling back to `_restart_agent` on a 404 (old base image), transport failure, or missing token (`no_container`/`not_running` short-circuit otherwise). Three producer paths converted, all under the #799 `agent_switch_lock`: **auto-switch** (`_perform_auto_switch`, SUB-003), **manual sub→sub reassignment** (`PUT /api/subscriptions/agents/{name}` — auth-mode changes none/api-key→sub still recreate), and **key rollover** (`reload_subscription_for_all_agents(sub_id)` fans a best-effort reload across every running agent on a re-registered subscription). Durable override (`/var/lib/trinity/oauth-token`) + `startup.sh` read make a rotation survive a plain restart — see the agent-server [Durable subscription-token override](#agent-containers) note. Agent-server endpoint mirroring follows Invariant #5.
+
 ### Real-time Delivery (RELIABILITY-003, #306)
 
 **Transport** (`event_bus.py`): Redis Streams. `ConnectionManager`/`FilteredWebSocketManager` are thin shims that `XADD` to the MAXLEN-trimmed `trinity:events` stream; one `StreamDispatcher` per backend process runs `XREAD BLOCK` and fans out to registered clients, evicting a client after 3 consecutive delivery failures. New broadcast sites keep calling `manager.broadcast(...)` / `filtered_manager.broadcast_filtered(...)` — never publish to the stream directly (Invariant #10).
diff --git a/docs/memory/feature-flows.md b/docs/memory/feature-flows.md
index d78d6df4..f9c198f1 100644
--- a/docs/memory/feature-flows.md
+++ b/docs/memory/feature-flows.md
@@ -11,6 +11,7 @@
 
 | Date | ID | Feature | Flow |
 |------|-----|---------|------|
+| 2026-06-13 | #1089 | feat: subscription token rotation via **hot-reload, not container recreate** — a dedicated agent-server `POST /api/credentials/reload-token` mutates the running container's `CLAUDE_CODE_OAUTH_TOKEN` env so the next claude subprocess uses the rotated token while in-flight turns finish (closes the #1037 collateral-kill class). Three producer paths converted under the #799 `agent_switch_lock`: auto-switch (SUB-003), manual sub→sub reassignment (auth-mode changes still recreate), and key-rollover fan-out on `POST /api/subscriptions` upsert. Durable writable-layer override (`/var/lib/trinity/oauth-token` + `startup.sh` read) survives a plain restart; recreate self-reconciles to the DB token. Falls back to the old `_restart_agent` recreate on a 404 (old base image). | [subscription-auto-switch.md](feature-flows/subscription-auto-switch.md) |
 | 2026-06-10 | #1130 | fix: retired `gemini-2.0-flash` replaced with env-configurable models — `GEMINI_TEXT_MODEL` (image-gen prompt refinement) + `GEMINI_TRANSCRIPTION_MODEL` (Telegram voice), both default `gemini-3.5-flash`, defined in `config.py`, empty-string-safe wiring in both compose files (#1076 pattern). | [image-generation.md](feature-flows/image-generation.md), [telegram-integration.md](feature-flows/telegram-integration.md) |
 | 2026-06-10 | #1108 | feat(ui): Agent Detail **Guardrails** tab renamed to **Settings** — sectioned config home. New `components/settings/SettingsPanel.vue` renders `GuardrailsPanel` unchanged as section #1; future per-agent settings land as additive sections, not new tabs. `?tab=guardrails` deep links alias to `settings` via `TAB_ALIASES`. Pure frontend. | [agent-guardrails.md](feature-flows/agent-guardrails.md) |
 | 2026-06-10 | #1114 | feat(ui): Agent Detail tabs overflow into a **"More ▾"** dropdown instead of horizontal scroll. New reusable `components/OverflowTabs.vue` ("priority+" pattern): a hidden, zero-layout mirror row measures every `{id,label,badge?}` tab's width (+ a worst-case "More" button) so the visible row renders as many tabs as fit and collapses the trailing remainder into a right-aligned disclosure menu. Re-measures on container resize (`ResizeObserver` on the outer wrapper, width-diff-guarded + rAF-debounced) and after `document.fonts.ready`; re-measures on tab/label/badge changes via a derived-signature `watch` (`flush:'post'`). Defaults to all-inline before the first measure (no first-paint snap; no "More" when everything fits). Active-in-overflow reflected on the trigger (active underline + dot), tab order never reshuffled. Plain `<button>` disclosure (NOT `role="menu"`): Tab traverses, Escape closes + returns focus, outside-`pointerdown` closes; dark-mode aware. `v-model` over `AgentDetail`'s `activeTab` string ref, so `?tab=` deep-linking is unaffected. Pure frontend; no backend/store changes. Generic enough for `Operations.vue` to adopt next. 6 Playwright e2e behaviors. | [agent-detail-tab-overflow.md](feature-flows/agent-detail-tab-overflow.md), [agent-overview-dashboard.md](feature-flows/agent-overview-dashboard.md) |
diff --git a/docs/memory/feature-flows/subscription-auto-switch.md b/docs/memory/feature-flows/subscription-auto-switch.md
index a1abe8fd..d92fbdb3 100644
--- a/docs/memory/feature-flows/subscription-auto-switch.md
+++ b/docs/memory/feature-flows/subscription-auto-switch.md
@@ -1,8 +1,8 @@
 # Feature Flow: Subscription Auto-Switch (SUB-003)
 
 > **Requirement**: `docs/requirements/SUB-003-subscription-auto-switch.md`
-> **Issue**: #153, threshold + scope update #441
-> **Status**: Implemented (2026-03-21), updated 2026-04-25 (#441)
+> **Issue**: #153, threshold + scope update #441, hot-reload #1089
+> **Status**: Implemented (2026-03-21), updated 2026-04-25 (#441), 2026-06-13 (#1089 — switch hot-reloads instead of recreating the container)
 
 ## Overview
 
@@ -34,11 +34,26 @@ Find best alternative subscription (fewest agents, not rate-limited in last 2h)
     ↓
 No alternative? → return None (log warning)
     ↓ Found
-Switch: DB update + container restart + log activity + send notification
+Switch: DB update + token HOT-RELOAD (not container recreate, #1089) + log activity + send notification
     ↓
 Return switch result → caller surfaces 429/503 with auto_switch info + retry hint
 ```
 
+## Token Application: Hot-Reload, not Recreate (#1089)
+
+The switch step (and the manual `PUT /api/subscriptions/agents/{name}` sub→sub
+path, and the `POST /api/subscriptions` key-rollover upsert) applies the new
+token via `_hot_reload_subscription_token(agent_name)` — a POST to the
+agent-server `POST /api/credentials/reload-token` that mutates the running
+container's `CLAUDE_CODE_OAUTH_TOKEN` env. The **next** Claude subprocess uses
+the new token while **in-flight** turns finish on the old one, so a rotation no
+longer kills every parallel execution (#1037). Falls back to the previous
+`_restart_agent` recreate on a 404 (old base image), transport failure, or a
+missing token. Durability across a plain restart is handled by the
+`/var/lib/trinity/oauth-token` writable-layer override that `startup.sh` reads
+before launching the agent server. Canonical home: architecture.md
+§"Subscription Token Rotation via Hot-Reload".
+
 ## Trigger Surface
 
 | Layer | Signal | Failure kind |
@@ -69,7 +84,9 @@ import from `backend.services`. Keep the two in sync when editing either.
 | Router | `src/backend/routers/chat.py` | 429 interception in chat proxy + background tasks |
 | Frontend | `src/frontend/src/views/Settings.vue` | Toggle in Subscriptions section |
 | Tests | `tests/test_subscription_auto_switch.py` | Smoke tests |
-| Tests | `tests/unit/test_subscription_auto_switch_pingpong.py` | Unit regression for #444 ping-pong prevention; `TestRateLimitAging` (#476) pins 2h-window correctness |
+| Tests | `tests/unit/test_subscription_auto_switch_pingpong.py` | Unit regression for #444 ping-pong prevention; `TestRateLimitAging` (#476) pins 2h-window correctness; `TestHotReloadSwitch` + `TestKeyRolloverFanOut` (#1089) pin the hot-reload helper, auto-switch wire-in, and key-rollover fan-out |
+| Tests | `tests/unit/test_subscription_reassign_hotreload.py` | #1089 — manual sub→sub hot-reload under the lock (no `container_stop`), mode-change still recreates, register/upsert key-rollover fan-out |
+| Tests | `tests/unit/test_reload_token_endpoint.py` | #1089 — agent-server `POST /api/credentials/reload-token`: sets env, writes the `/var/lib/trinity/oauth-token` override (0600), no `.env` write, `remove_api_key` pops `ANTHROPIC_API_KEY`, empty token → 400 |
 | Tests | `tests/unit/test_subscription_auto_switch_no_cred_import.py` | Chain-level regression for #606 — pins `_restart_agent → start_agent_internal → inject_assigned_credentials` reaches the `lifecycle.py:155` `subscription_mode` short-circuit and never re-enters file-based credential import |
 | Tests | `tests/unit/test_iso_cutoff.py` | Format parity between `iso_cutoff(N)` and `utc_now_iso()` (#476) |
 | Util | `src/backend/utils/helpers.py::iso_cutoff` | Canonical cutoff helper for ISO-Z TEXT comparisons (#476) |
diff --git a/docs/memory/requirements.md b/docs/memory/requirements.md
index 78d44834..20e741f9 100644
--- a/docs/memory/requirements.md
+++ b/docs/memory/requirements.md
@@ -1560,6 +1560,7 @@ All subsections 18.1–18.10 were deleted with the code. Flow docs archived at `
   - `src/backend/routers/chat.py` - 429 interception hooks
   - `src/frontend/src/views/Settings.vue` - Toggle UI
 - **Negative markers on `is_auth_failure` (#904, 2026-05-21)**: substring match on `AUTH_INDICATORS` now short-circuits to False when the error message also contains an unambiguous signal-kill / OOM / timeout marker (`sigkill`, `sigterm`, `sigint`, `exit code -9`, `exit code 137`, `exit code 143`, `out of memory`, `oom`, `memory cgroup`, `terminated by`, `killed by`). Prevents the SUB-003 trigger from firing on cgroup OOM kills whose detail string happens to contain a word like "token" or "authentication" via downstream wrapping. The same exclusion list lives in `src/scheduler/service.py:_is_auth_failure` to keep the two surfaces from drifting (see §10.4.1).
+- **Hot-reload, not recreate (#1089, 2026-06-13)**: the auto-switch no longer recreates the container — `_perform_auto_switch` hot-reloads the new token in place so in-flight turns on the agent survive. See §20.6.
 
 ### 20.5 Per-Subscription Usage Tracking (SUB-004)
 - **Status**: ✅ Implemented (2026-04-01)
@@ -1580,6 +1581,32 @@ All subsections 18.1–18.10 were deleted with the code. Flow docs archived at `
   - `src/backend/db/chat.py` - Session creation with subscription_id
   - `src/frontend/src/views/Settings.vue` - Usage display (if applicable)
 
+### 20.6 Credential Rotation via Hot-Reload, not Container Recreate (#1089)
+- **Status**: ✅ Implemented (2026-06-13)
+- **GitHub Issue**: #1089
+- **Extends**: SUB-002 / SUB-003
+- **Priority**: HIGH (`theme-reliability`)
+- **Builds on**: #799 (per-agent `agent_switch_lock`)
+- **Description**: Rotating an agent's subscription token used to **recreate the container**, making "rotate a credential" and "kill every in-flight turn" the same operation (#1037 collateral kills — one 429 on a shared subscription would auto-switch and destroy every parallel execution). Token rotation now goes through a surgical hot-reload of the running container; recreate is reserved for image/template/auth-**mode** changes. This removes the credential↔execution collision class structurally (TARGET_ARCHITECTURE §Agent Runtime).
+- **Mechanism**: the agent server spawns Claude via `subprocess.Popen(..., env={**os.environ, ...})` and authenticates purely from the `CLAUDE_CODE_OAUTH_TOKEN` env var (no `.credentials.json` write); it is a single uvicorn worker. Mutating the agent-server process `os.environ["CLAUDE_CODE_OAUTH_TOKEN"]` makes the **next** Claude subprocess use the new token; **in-flight** subprocesses keep their already-inherited old token and finish.
+- **Rotation paths converted to hot-reload**:
+  1. **Auto-switch** (SUB-003): `_perform_auto_switch` hot-reloads instead of `_restart_agent` (runs inside the #799 `agent_switch_lock`).
+  2. **Manual reassignment** (`PUT /api/subscriptions/agents/{name}`): a sub→sub swap hot-reloads under the lock; an auth-**mode** change (none/api-key → subscription) still recreates so `ANTHROPIC_API_KEY` is dropped and the OAuth token is baked into `Config.Env`.
+  3. **Key rollover** (`POST /api/subscriptions` upsert): re-registering a subscription's token fans a best-effort hot-reload out to every running agent on that subscription (one agent's failure never fails the upsert nor blocks the others).
+- **Key Features**:
+  - Agent-server endpoint `POST /api/credentials/reload-token` (`{token, remove_api_key}`) — mutates `os.environ` + persists the token to the writable-layer override; does **not** rewrite `.env`/`.mcp.json` or re-inject Trinity MCP.
+  - **Durable override (F2)**: the token is written to `/var/lib/trinity/oauth-token` (0600), deliberately **not** under `/home/developer` (the persisted workspace volume). `startup.sh` exports it before launching the agent server, so a plain fleet restart (`ops.py` raw stop+start, which bypasses `start_agent_internal`) keeps the rotated token. **Self-reconciling by Docker semantics**: the writable layer survives `stop`→`start` but is wiped on recreate (fresh layer), so a DB-driven recreate re-bakes `Config.Env` (DB token) and the stale override is gone — no marker logic.
+  - **Back-compat fallback**: running containers on an older base image return **404** for the endpoint → the backend falls back to `_restart_agent` (identical to pre-#1089 behavior). Per #1037, recreate stays out of scope; the fallback inherits whatever #1037 lands. An agent only gains the endpoint once recreated onto a rebuilt base image (no automatic fleet-wide adoption).
+- **Backend helpers** (`services/subscription_auto_switch.py`): `_hot_reload_subscription_token(agent_name)` (POST + restart fallback on 404/transport/no-token; `no_container`/`not_running` short-circuits) and `reload_subscription_for_all_agents(subscription_id)` (key-rollover fan-out under the lock).
+- **Files**:
+  - `docker/base-image/agent_server/routers/credentials.py` - `reload-token` endpoint + writable-layer override write
+  - `docker/base-image/agent_server/models.py` - `TokenReloadRequest`/`TokenReloadResponse`
+  - `docker/base-image/Dockerfile` - `mkdir+chown /var/lib/trinity` (Invariant #17 non-root)
+  - `docker/base-image/startup.sh` - export override token before agent-server launch
+  - `src/backend/services/subscription_auto_switch.py` - hot-reload helper + fan-out + auto-switch wire-in
+  - `src/backend/routers/subscriptions.py` - manual sub→sub under lock + key-rollover fan-out
+- **Known limitations**: cross-worker race on the process-local `agent_switch_lock` (prod `--workers 2`) is flagged for #1166/#799 (escalate to Redis `SETNX`); a bulk `delete_subscription` still leaves the deleted token live until next start (pre-existing, out of scope). Both self-heal via the durable override / `check_api_key_env_matches` reconciliation.
+
 ---
 
 ## Non-Functional Requirements
diff --git a/docs/planning/TARGET_ARCHITECTURE.md b/docs/planning/TARGET_ARCHITECTURE.md
index 7eb107f6..07637faf 100644
--- a/docs/planning/TARGET_ARCHITECTURE.md
+++ b/docs/planning/TARGET_ARCHITECTURE.md
@@ -275,7 +275,7 @@ When a single container's `max_parallel_tasks` ceiling — bounded by container
 
 **Post-execution hooks**: companion to the existing pre-check hook. `~/.trinity/post-check` runs after every task completion (language-agnostic, shebang-selected). Enables custom alerting, output validation, or state transitions defined by the agent template.
 
-**Credential rotation via hot-reload, not recreate**: rotating an agent's token (subscription auto-switch, key rollover) goes through the existing `/api/credentials/update` hot-reload endpoint and does **not** recreate the container. "Rotate a credential" and "kill every in-flight turn" must stop being the same operation (#1037) — container recreate is reserved for image/template changes, where the pre-recreate "stop pulling, finish in-flight" handshake applies. This removes the credential↔execution collision class structurally instead of recovering from it after the fact.
+**Credential rotation via hot-reload, not recreate** ✅ *Implemented (#1089, 2026-06-13)*: rotating an agent's subscription token (auto-switch, manual sub→sub reassignment, key rollover) goes through a dedicated agent-server `POST /api/credentials/reload-token` endpoint and does **not** recreate the container. (A new surgical endpoint, not the existing `/api/credentials/update` — the latter destructively rewrites `.env`/`.mcp.json`; the token is an env-only credential.) "Rotate a credential" and "kill every in-flight turn" are no longer the same operation (#1037) — container recreate is reserved for image/template/auth-**mode** changes, where the pre-recreate "stop pulling, finish in-flight" handshake applies. A writable-layer durable override (`/var/lib/trinity/oauth-token`, read by `startup.sh`) keeps a rotation across a plain restart; recreate self-reconciles to the DB token (fresh layer wipes the override). This removes the credential↔execution collision class structurally instead of recovering from it after the fact. See architecture.md §"Subscription Token Rotation via Hot-Reload".
 
 ---
 
diff --git a/src/backend/routers/subscriptions.py b/src/backend/routers/subscriptions.py
index 4b673dde..4a1d908d 100644
--- a/src/backend/routers/subscriptions.py
+++ b/src/backend/routers/subscriptions.py
@@ -89,6 +89,21 @@ async def register_subscription(
         )
 
         logger.info(f"Registered subscription '{request.name}' by {current_user.username}")
+
+        # #1089 (F1): a re-register (upsert) is a key rollover — fan a best-effort
+        # hot-reload out to every running agent on this subscription so they pick
+        # up the new token without a recreate. Swallowed on failure: the fan-out
+        # must never fail the registration. (No-op on first registration — no
+        # agents are assigned yet.)
+        try:
+            from services.subscription_auto_switch import reload_subscription_for_all_agents
+            await reload_subscription_for_all_agents(subscription.id)
+        except Exception as e:
+            logger.error(
+                f"[#1089] key-rollover hot-reload fan-out failed for "
+                f"subscription '{request.name}': {e}"
+            )
+
         return subscription
 
     except HTTPException:
@@ -228,9 +243,15 @@ async def assign_subscription_to_agent(
     """
     Assign a subscription to an agent.
 
-    Owner access required. If the agent is running, it will be restarted
-    so the container is recreated with `CLAUDE_CODE_OAUTH_TOKEN` env var
-    and `ANTHROPIC_API_KEY` removed.
+    Owner access required. The token is applied to a running agent based on the
+    kind of change (#1089):
+      - sub → sub swap: hot-reloaded in place via /api/credentials/reload-token,
+        so in-flight turns survive (no container recreate);
+      - none/api-key → subscription (an auth-MODE change): the container is
+        recreated so `ANTHROPIC_API_KEY` is dropped and `CLAUDE_CODE_OAUTH_TOKEN`
+        is baked into Config.Env.
+    Both run under the #799 per-agent switch lock so a manual reassignment can't
+    interleave with a concurrent auto-switch on the same agent.
     """
     # Owner or admin only — shared users must not mutate subscription assignments
     if not db.can_user_share_agent(current_user.username, agent_name):
@@ -241,43 +262,65 @@ async def assign_subscription_to_agent(
     if not subscription:
         raise HTTPException(status_code=404, detail=f"Subscription '{subscription_name}' not found")
 
-    try:
-        db.assign_subscription_to_agent(agent_name, subscription.id)
+    # #1089: snapshot the agent's CURRENT subscription BEFORE reassigning. A
+    # sub→sub swap (old_sub_id is not None) can hot-reload the token in place; an
+    # auth-mode change (old_sub_id is None) still needs the container recreated.
+    old_sub_id = db.get_agent_subscription_id(agent_name)
 
-        logger.info(
-            f"Assigned subscription '{subscription_name}' to agent '{agent_name}' "
-            f"by {current_user.username}"
+    try:
+        from services.subscription_auto_switch import (
+            agent_switch_lock,
+            _hot_reload_subscription_token,
         )
 
-        # If agent is running, restart it so the container is recreated
-        # with CLAUDE_CODE_OAUTH_TOKEN env var and without ANTHROPIC_API_KEY
-        from services.docker_service import get_agent_container, get_agent_status_from_container
-        from services.docker_utils import container_stop
-        from services.agent_service import start_agent_internal
-
-        container = get_agent_container(agent_name)
-        restart_result = None
-        injection_result = None
-
-        if container:
-            agent_status = get_agent_status_from_container(container)
-            if agent_status.status == "running":
-                try:
-                    await container_stop(container)
-                    await start_agent_internal(agent_name)
-                    restart_result = "success"
-                    injection_result = {"status": "success"}
-                    logger.info(
-                        f"Restarted agent '{agent_name}' to apply subscription token"
-                    )
-                except Exception as e:
-                    logger.error(f"Failed to restart agent '{agent_name}' for subscription: {e}")
-                    restart_result = f"failed: {e}"
-                    injection_result = {"status": "failed", "error": str(e)}
+        # #799/#1089: serialize the assign + apply window per agent so a manual
+        # reassignment can't interleave with a concurrent auto-switch.
+        async with await agent_switch_lock(agent_name):
+            db.assign_subscription_to_agent(agent_name, subscription.id)
+
+            logger.info(
+                f"Assigned subscription '{subscription_name}' to agent '{agent_name}' "
+                f"by {current_user.username}"
+            )
+
+            restart_result = None
+            injection_result = None
+
+            if old_sub_id is not None:
+                # sub → sub: hot-reload the token without recreating the container.
+                # The helper itself short-circuits ("not_running"/"no_container")
+                # for stopped agents and falls back to a recreate on 404 / transport
+                # failure / missing token.
+                restart_result = await _hot_reload_subscription_token(agent_name)
+                injection_result = {"status": restart_result}
             else:
-                injection_result = {"status": "agent_not_running"}
-        else:
-            injection_result = {"status": "agent_not_running"}
+                # none/api-key → subscription: an auth-MODE change still requires a
+                # recreate so ANTHROPIC_API_KEY is dropped and the OAuth token is
+                # baked into Config.Env.
+                from services.docker_service import get_agent_container, get_agent_status_from_container
+                from services.docker_utils import container_stop
+                from services.agent_service import start_agent_internal
+
+                container = get_agent_container(agent_name)
+                if container:
+                    agent_status = get_agent_status_from_container(container)
+                    if agent_status.status == "running":
+                        try:
+                            await container_stop(container)
+                            await start_agent_internal(agent_name)
+                            restart_result = "success"
+                            injection_result = {"status": "success"}
+                            logger.info(
+                                f"Restarted agent '{agent_name}' to apply subscription token"
+                            )
+                        except Exception as e:
+                            logger.error(f"Failed to restart agent '{agent_name}' for subscription: {e}")
+                            restart_result = f"failed: {e}"
+                            injection_result = {"status": "failed", "error": str(e)}
+                    else:
+                        injection_result = {"status": "agent_not_running"}
+                else:
+                    injection_result = {"status": "agent_not_running"}
 
         return {
             "success": True,
diff --git a/src/backend/services/subscription_auto_switch.py b/src/backend/services/subscription_auto_switch.py
index 7ab05767..41abd1ac 100644
--- a/src/backend/services/subscription_auto_switch.py
+++ b/src/backend/services/subscription_auto_switch.py
@@ -270,8 +270,11 @@ async def _perform_auto_switch(
     # utils/helpers.py, issue #476) and the 24h cleanup in
     # services/cleanup_service.py removes them from disk.
 
-    # Restart agent container to apply new subscription token
-    restart_result = await _restart_agent(agent_name)
+    # Rotate the subscription token on the running container via hot-reload so
+    # in-flight turns survive the switch (#1089). Falls back to a full restart on
+    # a 404 (old base image without the endpoint), transport failure, or when no
+    # token is resolvable — identical to the previous recreate behavior.
+    restart_result = await _hot_reload_subscription_token(agent_name)
 
     # Log activity event
     from services.activity_service import activity_service
@@ -355,3 +358,98 @@ async def _restart_agent(agent_name: str) -> str:
     except Exception as e:
         logger.error(f"[SUB-003] Failed to restart agent '{agent_name}': {e}")
         return f"failed: {e}"
+
+
+async def _hot_reload_subscription_token(agent_name: str) -> str:
+    """Push the agent's current DB subscription token to the running container
+    via ``POST /api/credentials/reload-token`` (#1089).
+
+    The agent server mutates its own ``os.environ["CLAUDE_CODE_OAUTH_TOKEN"]``,
+    so the NEXT claude subprocess uses the rotated token while in-flight turns
+    keep their already-inherited old token and finish — "rotate a credential"
+    is no longer the same operation as "kill every running turn".
+
+    Falls back to the full ``_restart_agent`` recreate path (today's behavior,
+    no regression) on:
+      - a 404 — an old base image that predates the endpoint,
+      - any transport / circuit failure (``AgentClientError`` family), or
+      - no resolvable token for the agent's current subscription.
+    Returns ``"no_container"`` / ``"not_running"`` when the agent is not a
+    running container, mirroring ``_restart_agent``.
+    """
+    try:
+        from services.docker_service import (
+            get_agent_container,
+            get_agent_status_from_container,
+        )
+        from services.agent_client import get_agent_client, AgentClientError
+
+        container = get_agent_container(agent_name)
+        if not container:
+            return "no_container"
+        if get_agent_status_from_container(container).status != "running":
+            return "not_running"
+
+        sub_id = db.get_agent_subscription_id(agent_name)
+        token = db.get_subscription_token(sub_id) if sub_id else None
+        if not token:
+            # No token to push (e.g. assignment cleared mid-flight). Fall back to
+            # the recreate path, which re-bakes Config.Env from the DB.
+            return await _restart_agent(agent_name)
+
+        client = get_agent_client(agent_name)
+        try:
+            # remove_api_key=False is intentional: subscription-backed agents never
+            # carry ANTHROPIC_API_KEY in env (popped at create time, lifecycle.py).
+            # The param is kept for a future mode-change-via-hot-reload caller.
+            resp = await client.post(
+                "/api/credentials/reload-token",
+                json={"token": token, "remove_api_key": False},
+                timeout=10.0,
+            )
+        except AgentClientError as e:
+            logger.warning(
+                f"[SUB-003] hot-reload transport failure for '{agent_name}': {e}; "
+                f"falling back to restart"
+            )
+            return await _restart_agent(agent_name)
+
+        if resp.status_code >= 400:  # 404 = old base image without the endpoint
+            logger.info(
+                f"[SUB-003] hot-reload returned HTTP {resp.status_code} for "
+                f"'{agent_name}'; falling back to restart"
+            )
+            return await _restart_agent(agent_name)
+
+        logger.info(f"[SUB-003] Hot-reloaded subscription token for '{agent_name}' (no recreate)")
+        return "hot_reloaded"
+    except Exception as e:
+        logger.error(
+            f"[SUB-003] hot-reload error for '{agent_name}': {e}; falling back to restart"
+        )
+        return await _restart_agent(agent_name)
+
+
+async def reload_subscription_for_all_agents(subscription_id: str) -> dict[str, str]:
+    """Hot-reload the subscription token on every running agent assigned to
+    `subscription_id` (#1089 key rollover — re-registering a subscription's
+    token via the `/api/subscriptions` upsert).
+
+    Best-effort per agent, each under the #799 per-agent switch lock so a
+    rollout can't interleave with a concurrent auto-switch: a failure on one
+    agent is logged and does NOT abort the fan-out or block the others. Stopped
+    agents are skipped by the helper (`not_running`) — they pick up the new
+    token on next start (Config.Env is re-baked from the DB on recreate).
+    Returns ``{agent_name: result}`` for observability.
+    """
+    results: dict[str, str] = {}
+    for agent_name in db.get_agents_by_subscription(subscription_id):
+        try:
+            async with await agent_switch_lock(agent_name):
+                results[agent_name] = await _hot_reload_subscription_token(agent_name)
+        except Exception as e:
+            logger.error(
+                f"[SUB-003] key-rollover hot-reload failed for '{agent_name}': {e}"
+            )
+            results[agent_name] = f"failed: {e}"
+    return results
diff --git a/tests/unit/test_reload_token_endpoint.py b/tests/unit/test_reload_token_endpoint.py
new file mode 100644
index 00000000..3f0fd6ca
--- /dev/null
+++ b/tests/unit/test_reload_token_endpoint.py
@@ -0,0 +1,121 @@
+"""
+Unit tests for the agent-server hot-reload-token endpoint (#1089).
+
+``POST /api/credentials/reload-token`` mutates the live agent-server process
+env so the NEXT claude subprocess uses the rotated subscription token (in-flight
+turns keep their already-inherited old token and finish), and persists the token
+to the writable-layer override (``/var/lib/trinity/oauth-token``, 0600) so it
+survives a plain stop+start (F2 durability). It must NOT rewrite ``.env`` /
+``.mcp.json`` or re-inject Trinity MCP — those are the destructive whole-file
+flows owned by ``/api/credentials/update`` and ``/api/credentials/inject``.
+
+Module: docker/base-image/agent_server/routers/credentials.py
+
+`agent_server` is registered as a namespace package by tests/unit/conftest.py
+(``_preload_real_agent_server``), so the real base-image router imports directly.
+"""
+
+import os
+import stat
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from agent_server.routers import credentials as cred_router
+
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    """TestClient over the credentials router with the writable-layer override
+    redirected to a tmp path (the host has no /var/lib/trinity), the sanitizer
+    refresh stubbed (don't read the host ~/.env), and the MCP re-inject spied so
+    we can assert the destructive whole-file flow is never triggered."""
+    override = tmp_path / "oauth-token"
+    monkeypatch.setattr(cred_router, "_TOKEN_OVERRIDE", override)
+
+    refresh_calls: list[int] = []
+    monkeypatch.setattr(
+        cred_router, "refresh_credential_values", lambda: refresh_calls.append(1)
+    )
+    inject_calls: list[int] = []
+    monkeypatch.setattr(
+        cred_router,
+        "inject_trinity_mcp_if_configured",
+        lambda: (inject_calls.append(1), False)[1],
+    )
+
+    # The endpoint mutates os.environ directly (not via monkeypatch); snapshot
+    # the two keys it touches and restore them so nothing leaks across tests.
+    saved = {k: os.environ.get(k) for k in ("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_API_KEY")}
+
+    app = FastAPI()
+    app.include_router(cred_router.router)
+    c = TestClient(app)
+    c._override = override  # type: ignore[attr-defined]
+    c._refresh_calls = refresh_calls  # type: ignore[attr-defined]
+    c._inject_calls = inject_calls  # type: ignore[attr-defined]
+    try:
+        yield c
+    finally:
+        for k, v in saved.items():
+            if v is None:
+                os.environ.pop(k, None)
+            else:
+                os.environ[k] = v
+
+
+def test_reload_sets_env_and_writes_durable_override(client):
+    """Happy path: env mutated for the next subprocess + durable override
+    written 0600 + sanitizer refreshed + NO destructive MCP re-inject."""
+    os.environ.pop("CLAUDE_CODE_OAUTH_TOKEN", None)
+
+    resp = client.post(
+        "/api/credentials/reload-token", json={"token": "sk-ant-oat01-rotated"}
+    )
+
+    assert resp.status_code == 200
+    assert resp.json() == {"status": "success", "reloaded": True}
+    # env mutated so the NEXT claude subprocess inherits the new token
+    assert os.environ["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-rotated"
+    # durable override written (survives a plain stop+start) with 0600 perms
+    assert client._override.read_text() == "sk-ant-oat01-rotated"
+    assert stat.S_IMODE(client._override.stat().st_mode) == 0o600
+    # sanitizer redaction set refreshed; the whole-file MCP re-inject NOT done
+    assert client._refresh_calls == [1]
+    assert client._inject_calls == []
+
+
+def test_reload_does_not_write_env_or_other_files(client):
+    """The endpoint writes ONLY the override — no sibling .env / .mcp.json
+    (proves it is not reusing the destructive /update or /inject flow)."""
+    client.post("/api/credentials/reload-token", json={"token": "tok"})
+
+    siblings = {p.name for p in client._override.parent.iterdir()}
+    assert siblings == {"oauth-token"}
+
+
+def test_remove_api_key_true_pops_anthropic_key(client, monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api-should-go")
+
+    resp = client.post(
+        "/api/credentials/reload-token",
+        json={"token": "tok", "remove_api_key": True},
+    )
+
+    assert resp.status_code == 200
+    assert "ANTHROPIC_API_KEY" not in os.environ
+
+
+def test_remove_api_key_defaults_false_preserves_key(client, monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api-stays")
+
+    resp = client.post("/api/credentials/reload-token", json={"token": "tok"})
+
+    assert resp.status_code == 200
+    assert os.environ["ANTHROPIC_API_KEY"] == "sk-ant-api-stays"
+
+
+def test_empty_token_returns_400(client):
+    resp = client.post("/api/credentials/reload-token", json={"token": ""})
+    assert resp.status_code == 400
diff --git a/tests/unit/test_subscription_auto_switch_pingpong.py b/tests/unit/test_subscription_auto_switch_pingpong.py
index 649aa2af..7436767e 100644
--- a/tests/unit/test_subscription_auto_switch_pingpong.py
+++ b/tests/unit/test_subscription_auto_switch_pingpong.py
@@ -534,3 +534,249 @@ async def test_no_switch_when_agent_has_no_subscription(self, svc):
         )
         assert result is None
         assert svc._spy_calls == []
+
+
+# =============================================================================
+# #1089: hot-reload subscription token (rotate without recreating the container)
+# =============================================================================
+#
+# `_hot_reload_subscription_token` pushes the agent's current DB subscription
+# token to the running container via POST /api/credentials/reload-token, so the
+# NEXT claude subprocess uses the new token while in-flight turns keep their
+# already-inherited old token and finish. It falls back to the full
+# `_restart_agent` path (today's behavior) on:
+#   - transport failure (AgentClientError / AgentNotReachableError),
+#   - HTTP >= 400 (a 404 means an old base image without the endpoint), or
+#   - no resolvable token.
+# Early-returns `no_container` / `not_running` exactly like `_restart_agent`.
+
+import asyncio  # noqa: E402  (used by the hot-reload + key-rollover tests below)
+import types as _types  # noqa: E402  (module-level helpers for the tests below)
+from unittest.mock import AsyncMock  # noqa: E402
+
+
+def _docker_stub(*, container: object = object(), status: str = "running"):
+    """Fake `services.docker_service` exposing the two helper lookups."""
+    mod = _types.ModuleType("services.docker_service")
+    mod.get_agent_container = lambda name: container
+    _status = _types.SimpleNamespace(status=status)
+    mod.get_agent_status_from_container = lambda c: _status
+    return mod
+
+
+class _StubAgentClientError(Exception):
+    pass
+
+
+class _StubAgentNotReachableError(_StubAgentClientError):
+    pass
+
+
+def _agent_client_stub(*, post):
+    """Fake `services.agent_client`. `post` is bound to `client.post`
+    (an AsyncMock or coroutine fn). `AgentClientError` is the base the helper
+    catches; `AgentNotReachableError` subclasses it (transport-failure case)."""
+    mod = _types.ModuleType("services.agent_client")
+    mod.AgentClientError = _StubAgentClientError
+    mod.AgentNotReachableError = _StubAgentNotReachableError
+    client = _types.SimpleNamespace(post=post)
+    mod.get_agent_client = lambda name: client
+    return mod
+
+
+class TestHotReloadSwitch:
+    """#1089 — the auto-switch path hot-reloads the token instead of recreating
+    the container; it falls back to restart on 404 / transport error / no token,
+    and short-circuits when the agent isn't a running container."""
+
+    @pytest.fixture
+    def auto_switch(self, monkeypatch):
+        import importlib
+
+        stub_db = _install_database_stub()
+        # Token resolution defaults: agent on sub-a, sub-a token present.
+        stub_db.get_agent_subscription_id.return_value = "sub-a"
+        stub_db.get_subscription_token.return_value = "sk-ant-oat01-new-token"
+
+        import services.subscription_auto_switch as mod
+        importlib.reload(mod)
+
+        # Spy the fallback so we can assert when it IS / IS NOT taken.
+        restart_calls: list[str] = []
+
+        async def _restart_spy(agent_name):
+            restart_calls.append(agent_name)
+            return "restarted_fallback"
+
+        monkeypatch.setattr(mod, "_restart_agent", _restart_spy)
+        mod._restart_calls = restart_calls  # type: ignore[attr-defined]
+        mod._stub_db = stub_db  # type: ignore[attr-defined]
+        return mod
+
+    @pytest.mark.asyncio
+    async def test_happy_path_posts_reload_token_no_recreate(self, auto_switch, monkeypatch):
+        post = AsyncMock(return_value=_types.SimpleNamespace(status_code=200))
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub())
+        monkeypatch.setitem(sys.modules, "services.agent_client", _agent_client_stub(post=post))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "hot_reloaded"
+        assert auto_switch._restart_calls == []  # NO container recreate — in-flight turns survive
+        post.assert_awaited_once()
+        args, kwargs = post.call_args
+        assert args[0] == "/api/credentials/reload-token"
+        assert kwargs["json"] == {"token": "sk-ant-oat01-new-token", "remove_api_key": False}
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_restart_on_404(self, auto_switch, monkeypatch):
+        """404 = old base image without the endpoint → restart fallback."""
+        post = AsyncMock(return_value=_types.SimpleNamespace(status_code=404))
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub())
+        monkeypatch.setitem(sys.modules, "services.agent_client", _agent_client_stub(post=post))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "restarted_fallback"
+        assert auto_switch._restart_calls == ["agent-x"]
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_restart_on_transport_error(self, auto_switch, monkeypatch):
+        post = AsyncMock(side_effect=_StubAgentNotReachableError("connection refused"))
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub())
+        monkeypatch.setitem(sys.modules, "services.agent_client", _agent_client_stub(post=post))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "restarted_fallback"
+        assert auto_switch._restart_calls == ["agent-x"]
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_restart_when_no_token(self, auto_switch, monkeypatch):
+        auto_switch._stub_db.get_subscription_token.return_value = None
+        post = AsyncMock()
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub())
+        monkeypatch.setitem(sys.modules, "services.agent_client", _agent_client_stub(post=post))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "restarted_fallback"
+        assert auto_switch._restart_calls == ["agent-x"]
+        post.assert_not_awaited()  # never reached the POST
+
+    @pytest.mark.asyncio
+    async def test_no_container_short_circuits(self, auto_switch, monkeypatch):
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub(container=None))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "no_container"
+        assert auto_switch._restart_calls == []
+
+    @pytest.mark.asyncio
+    async def test_not_running_short_circuits(self, auto_switch, monkeypatch):
+        monkeypatch.setitem(sys.modules, "services.docker_service", _docker_stub(status="stopped"))
+
+        result = await auto_switch._hot_reload_subscription_token("agent-x")
+
+        assert result == "not_running"
+        assert auto_switch._restart_calls == []
+
+    @pytest.mark.asyncio
+    async def test_perform_auto_switch_hot_reloads_not_restarts(self, auto_switch, monkeypatch):
+        """The auto-switch wire-in: `_perform_auto_switch` routes through the
+        hot-reload helper, so `restart_result == "hot_reloaded"` and the
+        recreate path (`_restart_agent`) is never taken."""
+        # Stub the heavy local-import targets in `_perform_auto_switch`.
+        act_mod = _types.ModuleType("services.activity_service")
+        act_svc = MagicMock()
+        act_svc.track_activity = AsyncMock(return_value="act-1")
+        act_svc.complete_activity = AsyncMock(return_value=None)
+        act_mod.activity_service = act_svc
+        monkeypatch.setitem(sys.modules, "services.activity_service", act_mod)
+
+        models_mod = _types.ModuleType("models")
+        models_mod.ActivityType = _types.SimpleNamespace(SCHEDULE_END="schedule_end")
+        models_mod.ActivityState = _types.SimpleNamespace(COMPLETED="completed", FAILED="failed")
+        monkeypatch.setitem(sys.modules, "models", models_mod)
+
+        hot_calls: list[str] = []
+
+        async def _hot_spy(agent_name):
+            hot_calls.append(agent_name)
+            return "hot_reloaded"
+
+        monkeypatch.setattr(auto_switch, "_hot_reload_subscription_token", _hot_spy)
+
+        new_sub = MagicMock()
+        new_sub.id = "sub-b"
+        new_sub.name = "sub-B"
+
+        result = await auto_switch._perform_auto_switch(
+            agent_name="agent-x",
+            old_subscription_id="sub-a",
+            old_subscription_name="sub-A",
+            new_subscription=new_sub,
+            failure_kind="rate_limit",
+            event_count=1,
+        )
+
+        assert result["switched"] is True
+        assert result["restart_result"] == "hot_reloaded"
+        assert hot_calls == ["agent-x"]  # hot-reload used
+        assert auto_switch._restart_calls == []  # recreate path NOT taken
+
+
+class TestKeyRolloverFanOut:
+    """#1089 (F1) — re-registering a subscription's token fans a best-effort
+    hot-reload out to every running agent on that subscription. One agent's
+    failure must not abort the fan-out nor block the others."""
+
+    @pytest.fixture
+    def auto_switch(self, monkeypatch):
+        import importlib
+
+        stub_db = _install_database_stub()
+        import services.subscription_auto_switch as mod
+        importlib.reload(mod)
+        mod._stub_db = stub_db  # type: ignore[attr-defined]
+        return mod
+
+    @pytest.mark.asyncio
+    async def test_fan_out_attempts_every_agent_despite_one_failure(self, auto_switch, monkeypatch):
+        auto_switch._stub_db.get_agents_by_subscription.return_value = ["a1", "a2", "a3"]
+
+        seen: list[str] = []
+
+        async def _hot(name):
+            seen.append(name)
+            if name == "a2":
+                raise RuntimeError("boom")
+            return "hot_reloaded"
+
+        monkeypatch.setattr(auto_switch, "_hot_reload_subscription_token", _hot)
+
+        async def _lock(name):
+            return asyncio.Lock()
+
+        monkeypatch.setattr(auto_switch, "agent_switch_lock", _lock)
+
+        results = await auto_switch.reload_subscription_for_all_agents("sub-a")
+
+        assert seen == ["a1", "a2", "a3"]  # all attempted, fan-out not aborted
+        assert results["a1"] == "hot_reloaded"
+        assert results["a3"] == "hot_reloaded"
+        assert results["a2"].startswith("failed:")
+
+    @pytest.mark.asyncio
+    async def test_fan_out_no_agents_is_noop(self, auto_switch, monkeypatch):
+        auto_switch._stub_db.get_agents_by_subscription.return_value = []
+
+        async def _hot(name):
+            raise AssertionError("must not be called when no agents are assigned")
+
+        monkeypatch.setattr(auto_switch, "_hot_reload_subscription_token", _hot)
+
+        results = await auto_switch.reload_subscription_for_all_agents("sub-a")
+
+        assert results == {}
diff --git a/tests/unit/test_subscription_reassign_hotreload.py b/tests/unit/test_subscription_reassign_hotreload.py
new file mode 100644
index 00000000..432a90fc
--- /dev/null
+++ b/tests/unit/test_subscription_reassign_hotreload.py
@@ -0,0 +1,241 @@
+"""
+Unit tests for subscription hot-reload on the router paths (#1089).
+
+Two producer paths beyond the SUB-003 auto-switch:
+  - Manual reassignment (PUT /api/subscriptions/agents/{name}, T4): a sub→sub
+    swap hot-reloads the token in place (in-flight turns survive, no
+    container_stop) under the #799 per-agent lock; an auth-MODE change
+    (none/api-key → subscription) still recreates so ANTHROPIC_API_KEY is
+    dropped and the OAuth token is baked into Config.Env.
+  - Key rollover (POST /api/subscriptions upsert, T5): re-registering a
+    subscription's token fans a best-effort hot-reload out to every running
+    agent on that subscription; one agent's failure never fails the upsert nor
+    blocks the others.
+
+Module: src/backend/routers/subscriptions.py
+        src/backend/services/subscription_auto_switch.py
+
+NOTE: `routers.subscriptions` (and `db_models`) are imported lazily inside the
+fixtures rather than at module top. tests/unit/test_subscription_auto_switch_pingpong.py
+pops `utils` from sys.modules at collection time; importing the full `routers`
+package (which transitively needs `utils.url_validation`) at MODULE TOP here
+would then fail collection if that file is collected first. The conftest's
+autouse `_restore_unit_sys_modules` restores `utils` before each test runs, so
+a fixture-time (test-run) import is safe regardless of collection order.
+"""
+
+import asyncio
+import sys
+import types
+from unittest.mock import MagicMock
+
+import pytest
+
+
+@pytest.fixture
+def owner_user():
+    u = MagicMock()
+    u.username = "owner"
+    u.role = "user"
+    return u
+
+
+@pytest.fixture
+def admin_user():
+    u = MagicMock()
+    u.username = "admin"
+    u.role = "admin"
+    return u
+
+
+@pytest.fixture
+def manual_env(monkeypatch):
+    """Stub the db + the local-import targets the manual-reassign endpoint
+    reaches, and spy both the hot-reload helper and the recreate path so each
+    test can assert exactly which one ran."""
+    import routers.subscriptions as rs  # lazy: see module docstring
+
+    fake_db = MagicMock()
+    fake_db.can_user_share_agent.return_value = True
+    sub = MagicMock()
+    sub.id = "sub-b"
+    sub.name = "sub-B"
+    fake_db.get_subscription_by_name.return_value = sub
+    monkeypatch.setattr(rs, "db", fake_db)
+
+    container = object()
+    docker_service = types.ModuleType("services.docker_service")
+    docker_service.get_agent_container = lambda name: container
+    docker_service.get_agent_status_from_container = (
+        lambda c: types.SimpleNamespace(status="running")
+    )
+    monkeypatch.setitem(sys.modules, "services.docker_service", docker_service)
+
+    stop_calls: list = []
+    docker_utils = types.ModuleType("services.docker_utils")
+
+    async def _stop(c):
+        stop_calls.append(c)
+
+    docker_utils.container_stop = _stop
+    monkeypatch.setitem(sys.modules, "services.docker_utils", docker_utils)
+
+    start_calls: list = []
+    agent_service = types.ModuleType("services.agent_service")
+
+    async def _start(name):
+        start_calls.append(name)
+
+    agent_service.start_agent_internal = _start
+    monkeypatch.setitem(sys.modules, "services.agent_service", agent_service)
+
+    import services.subscription_auto_switch as auto_switch
+
+    hot_calls: list = []
+
+    async def _hot(name):
+        hot_calls.append(name)
+        return "hot_reloaded"
+
+    monkeypatch.setattr(auto_switch, "_hot_reload_subscription_token", _hot)
+
+    lock_acquired: list = []
+
+    async def _lock(name):
+        lock_acquired.append(name)
+        return asyncio.Lock()
+
+    monkeypatch.setattr(auto_switch, "agent_switch_lock", _lock)
+
+    return types.SimpleNamespace(
+        rs=rs,
+        db=fake_db,
+        container=container,
+        stop_calls=stop_calls,
+        start_calls=start_calls,
+        hot_calls=hot_calls,
+        lock_acquired=lock_acquired,
+    )
+
+
+class TestManualReassignHotReload:
+    """T4 — PUT /api/subscriptions/agents/{name}."""
+
+    @pytest.mark.asyncio
+    async def test_sub_to_sub_hot_reloads_no_container_stop(self, manual_env, owner_user):
+        """A sub→sub swap on a running agent hot-reloads the token under the
+        per-agent lock — the container is NOT stopped/recreated."""
+        manual_env.db.get_agent_subscription_id.return_value = "sub-a"  # already on a sub
+
+        result = await manual_env.rs.assign_subscription_to_agent(
+            agent_name="agent-x",
+            subscription_name="sub-B",
+            current_user=owner_user,
+        )
+
+        assert manual_env.hot_calls == ["agent-x"]  # hot-reload taken
+        assert manual_env.stop_calls == []  # NO container recreate
+        assert manual_env.start_calls == []
+        assert manual_env.lock_acquired == ["agent-x"]  # under the #799 lock
+        assert result["restart_result"] == "hot_reloaded"
+        # DB switched before applying the token
+        manual_env.db.assign_subscription_to_agent.assert_called_once_with("agent-x", "sub-b")
+
+    @pytest.mark.asyncio
+    async def test_mode_change_none_to_sub_still_recreates(self, manual_env, owner_user):
+        """An auth-mode change (no prior subscription → subscription) keeps the
+        recreate path so ANTHROPIC_API_KEY is dropped and the token is baked in."""
+        manual_env.db.get_agent_subscription_id.return_value = None  # api-key/none → sub
+
+        result = await manual_env.rs.assign_subscription_to_agent(
+            agent_name="agent-x",
+            subscription_name="sub-B",
+            current_user=owner_user,
+        )
+
+        assert manual_env.hot_calls == []  # hot-reload NOT taken
+        assert manual_env.stop_calls == [manual_env.container]  # recreate path
+        assert manual_env.start_calls == ["agent-x"]
+        assert manual_env.lock_acquired == ["agent-x"]  # still serialized
+        assert result["restart_result"] == "success"
+
+    @pytest.mark.asyncio
+    async def test_non_owner_rejected(self, manual_env, owner_user):
+        """Owner/admin gate is unchanged — a non-owner gets 403 before any switch."""
+        from fastapi import HTTPException
+
+        manual_env.db.can_user_share_agent.return_value = False
+
+        with pytest.raises(HTTPException) as exc:
+            await manual_env.rs.assign_subscription_to_agent(
+                agent_name="agent-x",
+                subscription_name="sub-B",
+                current_user=owner_user,
+            )
+        assert exc.value.status_code == 403
+        assert manual_env.hot_calls == []
+        assert manual_env.stop_calls == []
+
+
+@pytest.fixture
+def register_env(monkeypatch):
+    """Stub the db + the key-rollover fan-out for the register/upsert endpoint."""
+    import routers.subscriptions as rs  # lazy: see module docstring
+
+    fake_db = MagicMock()
+    fake_db.get_user_by_username.return_value = {"id": 1}
+    created = MagicMock()
+    created.id = "sub-x"
+    created.name = "sub-X"
+    fake_db.create_subscription.return_value = created
+    monkeypatch.setattr(rs, "db", fake_db)
+
+    # register_subscription 503s without an encryption key configured.
+    monkeypatch.setenv("CREDENTIAL_ENCRYPTION_KEY", "0" * 64)
+
+    import services.subscription_auto_switch as auto_switch
+
+    fanout_calls: list = []
+
+    async def _fanout(sub_id):
+        fanout_calls.append(sub_id)
+        return {}
+
+    monkeypatch.setattr(auto_switch, "reload_subscription_for_all_agents", _fanout)
+
+    return types.SimpleNamespace(
+        rs=rs, db=fake_db, created=created, fanout_calls=fanout_calls, auto_switch=auto_switch
+    )
+
+
+class TestRegisterKeyRollover:
+    """T5 / F1 — POST /api/subscriptions upsert fans a hot-reload out to every
+    running agent on that subscription, best-effort."""
+
+    @pytest.mark.asyncio
+    async def test_upsert_fans_out_hot_reload(self, register_env, admin_user):
+        from db_models import SubscriptionCredentialCreate
+
+        request = SubscriptionCredentialCreate(name="sub-X", token="sk-ant-oat01-rolled")
+
+        result = await register_env.rs.register_subscription(request, current_user=admin_user)
+
+        assert result is register_env.created
+        assert register_env.fanout_calls == ["sub-x"]  # fanned out to the upserted sub id
+
+    @pytest.mark.asyncio
+    async def test_fan_out_failure_does_not_fail_upsert(self, register_env, admin_user, monkeypatch):
+        """Best-effort: a fan-out blow-up is logged and swallowed — the upsert
+        still succeeds and returns the registered subscription."""
+        from db_models import SubscriptionCredentialCreate
+
+        async def _boom(sub_id):
+            raise RuntimeError("redis down")
+
+        monkeypatch.setattr(register_env.auto_switch, "reload_subscription_for_all_agents", _boom)
+
+        request = SubscriptionCredentialCreate(name="sub-X", token="sk-ant-oat01-rolled")
+
+        result = await register_env.rs.register_subscription(request, current_user=admin_user)
+
+        assert result is register_env.created  # upsert NOT failed by the fan-out error

From 2ccd65441e2c11aa02d08458a4d6469b6f0310d7 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Sat, 13 Jun 2026 15:21:04 +0100
Subject: [PATCH 3/6] fix(agent-server): create oauth-token override atomically
 at 0600 (#1089)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

write_text()+chmod() created the override file under the process umask
(typically 0644) and left the token world-readable in the window between
create and the follow-up chmod. Use os.open(..., O_CREAT|O_TRUNC, 0o600)
so the file is created with 0600 in one syscall, and fchmod the fd as
well so a pre-existing override (older write path or tampering) is forced
back to 0600 — O_CREAT only applies its mode arg on creation.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../agent_server/routers/credentials.py       | 16 ++++++++++++----
 .../feature-flows/subscription-auto-switch.md |  7 +++++--
 tests/unit/test_reload_token_endpoint.py      | 19 +++++++++++++++++++
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/docker/base-image/agent_server/routers/credentials.py b/docker/base-image/agent_server/routers/credentials.py
index da00e18f..04e770a3 100644
--- a/docker/base-image/agent_server/routers/credentials.py
+++ b/docker/base-image/agent_server/routers/credentials.py
@@ -167,10 +167,18 @@ async def reload_subscription_token(request: TokenReloadRequest):
     if request.remove_api_key:
         os.environ.pop("ANTHROPIC_API_KEY", None)
 
-    # Persist to the writable-layer override (0600). Parent dir is created +
-    # chowned in the Dockerfile, so the agent (UID 1000) can write here.
-    _TOKEN_OVERRIDE.write_text(request.token)
-    _TOKEN_OVERRIDE.chmod(0o600)
+    # Persist to the writable-layer override. Parent dir is created + chowned in
+    # the Dockerfile, so the agent (UID 1000) can write here. Create the file
+    # atomically with 0600 via os.open() rather than write_text()+chmod(): the
+    # latter creates the file under the process umask (typically 0644) and leaves
+    # it world-readable until the follow-up chmod, a brief but avoidable window.
+    # The mode arg only applies on *creation*, so also fchmod the fd — a
+    # pre-existing override (older write path / tampering) keeps its own perms
+    # through O_CREAT|O_TRUNC, and we must still force it back to 0600.
+    fd = os.open(_TOKEN_OVERRIDE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w") as f:
+        os.fchmod(f.fileno(), 0o600)
+        f.write(request.token)
 
     # Add the new token to the log-redaction set (drops the old exact-match
     # value; OAuth tokens stay caught by the sk-ant value regex regardless).
diff --git a/docs/memory/feature-flows/subscription-auto-switch.md b/docs/memory/feature-flows/subscription-auto-switch.md
index d92fbdb3..3117d31c 100644
--- a/docs/memory/feature-flows/subscription-auto-switch.md
+++ b/docs/memory/feature-flows/subscription-auto-switch.md
@@ -51,7 +51,10 @@ longer kills every parallel execution (#1037). Falls back to the previous
 `_restart_agent` recreate on a 404 (old base image), transport failure, or a
 missing token. Durability across a plain restart is handled by the
 `/var/lib/trinity/oauth-token` writable-layer override that `startup.sh` reads
-before launching the agent server. Canonical home: architecture.md
+before launching the agent server. The override is created **atomically at mode
+`0600`** via `os.open(..., O_CREAT, 0o600)` — not `write_text()`+`chmod()`, which
+would leave the token file briefly world-readable under the process umask between
+create and chmod. Canonical home: architecture.md
 §"Subscription Token Rotation via Hot-Reload".
 
 ## Trigger Surface
@@ -86,7 +89,7 @@ import from `backend.services`. Keep the two in sync when editing either.
 | Tests | `tests/test_subscription_auto_switch.py` | Smoke tests |
 | Tests | `tests/unit/test_subscription_auto_switch_pingpong.py` | Unit regression for #444 ping-pong prevention; `TestRateLimitAging` (#476) pins 2h-window correctness; `TestHotReloadSwitch` + `TestKeyRolloverFanOut` (#1089) pin the hot-reload helper, auto-switch wire-in, and key-rollover fan-out |
 | Tests | `tests/unit/test_subscription_reassign_hotreload.py` | #1089 — manual sub→sub hot-reload under the lock (no `container_stop`), mode-change still recreates, register/upsert key-rollover fan-out |
-| Tests | `tests/unit/test_reload_token_endpoint.py` | #1089 — agent-server `POST /api/credentials/reload-token`: sets env, writes the `/var/lib/trinity/oauth-token` override (0600), no `.env` write, `remove_api_key` pops `ANTHROPIC_API_KEY`, empty token → 400 |
+| Tests | `tests/unit/test_reload_token_endpoint.py` | #1089 — agent-server `POST /api/credentials/reload-token`: sets env, atomically writes the `/var/lib/trinity/oauth-token` override at `0600`, no `.env` write, `remove_api_key` pops `ANTHROPIC_API_KEY`, empty token → 400 |
 | Tests | `tests/unit/test_subscription_auto_switch_no_cred_import.py` | Chain-level regression for #606 — pins `_restart_agent → start_agent_internal → inject_assigned_credentials` reaches the `lifecycle.py:155` `subscription_mode` short-circuit and never re-enters file-based credential import |
 | Tests | `tests/unit/test_iso_cutoff.py` | Format parity between `iso_cutoff(N)` and `utc_now_iso()` (#476) |
 | Util | `src/backend/utils/helpers.py::iso_cutoff` | Canonical cutoff helper for ISO-Z TEXT comparisons (#476) |
diff --git a/tests/unit/test_reload_token_endpoint.py b/tests/unit/test_reload_token_endpoint.py
index 3f0fd6ca..22fa8368 100644
--- a/tests/unit/test_reload_token_endpoint.py
+++ b/tests/unit/test_reload_token_endpoint.py
@@ -86,6 +86,25 @@ def test_reload_sets_env_and_writes_durable_override(client):
     assert client._inject_calls == []
 
 
+def test_override_retightened_when_preexisting_world_readable(client):
+    """#1089 hardening: if the override already exists with loose perms (e.g.
+    0644 left by an older write path or tampering), a reload re-tightens it to
+    0600. ``os.open(..., 0o600)`` only applies its mode on *creation* — for an
+    existing file the mode arg is ignored — so the atomic create is paired with
+    an fchmod to enforce 0600 on the existing fd too (the old write_text()+chmod()
+    always re-tightened; the os.open() refinement must not silently lose that)."""
+    client._override.write_text("stale-token")
+    client._override.chmod(0o644)
+
+    resp = client.post(
+        "/api/credentials/reload-token", json={"token": "sk-ant-oat01-retighten"}
+    )
+
+    assert resp.status_code == 200
+    assert client._override.read_text() == "sk-ant-oat01-retighten"
+    assert stat.S_IMODE(client._override.stat().st_mode) == 0o600
+
+
 def test_reload_does_not_write_env_or_other_files(client):
     """The endpoint writes ONLY the override — no sibling .env / .mcp.json
     (proves it is not reusing the destructive /update or /inject flow)."""

From ee353a4080a2b986ee58de190a43ca60d5e56176 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Sat, 13 Jun 2026 15:22:02 +0100
Subject: [PATCH 4/6] fix(subscriptions): snapshot old_sub_id inside the switch
 lock (#1089)

The agent's current subscription was read BEFORE entering agent_switch_lock,
so a concurrent auto-switch could change the assignment between that read
and the in-lock DB assign (TOCTOU). The stale old_sub_id could then
mis-classify a sub->sub swap as an auth-mode change (or vice-versa) and
route it into a needless container recreate instead of a hot-reload.
Move the get_agent_subscription_id() snapshot inside the lock, immediately
before the assign.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../feature-flows/subscription-auto-switch.md |  2 +-
 src/backend/routers/subscriptions.py          | 12 +++---
 .../test_subscription_reassign_hotreload.py   | 41 +++++++++++++++++++
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/docs/memory/feature-flows/subscription-auto-switch.md b/docs/memory/feature-flows/subscription-auto-switch.md
index 3117d31c..898be32b 100644
--- a/docs/memory/feature-flows/subscription-auto-switch.md
+++ b/docs/memory/feature-flows/subscription-auto-switch.md
@@ -157,5 +157,5 @@ anyway, so the omission was silent.
 - **All subscriptions exhausted**: No switch, error surfaces as normal 429/503. `_perform_auto_switch` does **not** clear rate-limit events for the old subscription — those events are the signal that keeps `is_subscription_rate_limited()` truthful, so the just-drained sub is not offered as a candidate on the next cycle (issue #444).
 - **API key agents**: Auto-switch only applies to subscription-based agents
 - **Flip-flopping** (#441 update): the 2h skip-list (`is_subscription_rate_limited` ∧ `select_best_alternative_subscription`) is now the only thrash guard. Pre-#441 the threshold also required 2 consecutive 429s before switching, but that gated user-visible failures unnecessarily — the skip-list alone is sufficient because a just-drained sub stays flagged for 2h post-switch.
-- **Concurrent switches**: SQLite serialization prevents races
+- **Concurrent switches** (#799/#1089): a per-agent `agent_switch_lock` serializes the assign+apply window so a manual `PUT /api/subscriptions/agents/{name}` reassignment can't interleave with a concurrent auto-switch. The `old_sub_id` snapshot is taken **inside** that lock, immediately before the DB assign — a concurrent switch therefore can't change the agent's subscription between the read and the assign (TOCTOU). Without this, a sub→sub swap could be mis-classified as an auth-mode change (or vice-versa) and routed into a needless container recreate instead of a hot-reload.
 - **Cleanup**: Records older than 24h are pruned hourly by `CleanupService` (phase 6, #476); the 2h "is rate-limited" window drives candidate filtering independently of cleanup
diff --git a/src/backend/routers/subscriptions.py b/src/backend/routers/subscriptions.py
index 4a1d908d..263359bb 100644
--- a/src/backend/routers/subscriptions.py
+++ b/src/backend/routers/subscriptions.py
@@ -262,11 +262,6 @@ async def assign_subscription_to_agent(
     if not subscription:
         raise HTTPException(status_code=404, detail=f"Subscription '{subscription_name}' not found")
 
-    # #1089: snapshot the agent's CURRENT subscription BEFORE reassigning. A
-    # sub→sub swap (old_sub_id is not None) can hot-reload the token in place; an
-    # auth-mode change (old_sub_id is None) still needs the container recreated.
-    old_sub_id = db.get_agent_subscription_id(agent_name)
-
     try:
         from services.subscription_auto_switch import (
             agent_switch_lock,
@@ -276,6 +271,13 @@ async def assign_subscription_to_agent(
         # #799/#1089: serialize the assign + apply window per agent so a manual
         # reassignment can't interleave with a concurrent auto-switch.
         async with await agent_switch_lock(agent_name):
+            # #1089: snapshot the agent's CURRENT subscription under the lock,
+            # before reassigning, so a concurrent auto-switch can't change it
+            # between the read and the assign (TOCTOU). A sub→sub swap
+            # (old_sub_id is not None) hot-reloads the token in place; an
+            # auth-mode change (old_sub_id is None) still needs the container
+            # recreated.
+            old_sub_id = db.get_agent_subscription_id(agent_name)
             db.assign_subscription_to_agent(agent_name, subscription.id)
 
             logger.info(
diff --git a/tests/unit/test_subscription_reassign_hotreload.py b/tests/unit/test_subscription_reassign_hotreload.py
index 432a90fc..85ebb68a 100644
--- a/tests/unit/test_subscription_reassign_hotreload.py
+++ b/tests/unit/test_subscription_reassign_hotreload.py
@@ -159,6 +159,47 @@ async def test_mode_change_none_to_sub_still_recreates(self, manual_env, owner_u
         assert manual_env.lock_acquired == ["agent-x"]  # still serialized
         assert result["restart_result"] == "success"
 
+    @pytest.mark.asyncio
+    async def test_old_sub_snapshot_read_under_lock(self, manual_env, owner_user, monkeypatch):
+        """#1089 TOCTOU: the agent's CURRENT subscription is snapshotted AFTER the
+        per-agent switch lock is entered, never before. Reading it outside the
+        lock lets a concurrent auto-switch change the assignment between the read
+        and the assign, so the recreate-vs-hot-reload branch could be chosen
+        against a stale `old_sub_id`."""
+        import services.subscription_auto_switch as auto_switch
+
+        order: list[str] = []
+
+        class _RecordingLock:
+            async def __aenter__(self):
+                order.append("lock_enter")
+                return self
+
+            async def __aexit__(self, *exc):
+                order.append("lock_exit")
+                return False
+
+        async def _lock(name):
+            return _RecordingLock()
+
+        monkeypatch.setattr(auto_switch, "agent_switch_lock", _lock)
+
+        def _read_sub(name):
+            order.append("read_sub")
+            return "sub-a"  # already on a sub → hot-reload branch
+
+        manual_env.db.get_agent_subscription_id.side_effect = _read_sub
+
+        await manual_env.rs.assign_subscription_to_agent(
+            agent_name="agent-x",
+            subscription_name="sub-B",
+            current_user=owner_user,
+        )
+
+        # the snapshot read happens strictly INSIDE the lock window
+        assert order == ["lock_enter", "read_sub", "lock_exit"]
+        assert manual_env.hot_calls == ["agent-x"]  # branch chosen off the in-lock read
+
     @pytest.mark.asyncio
     async def test_non_owner_rejected(self, manual_env, owner_user):
         """Owner/admin gate is unchanged — a non-owner gets 403 before any switch."""

From 6aa162bd275845548e10ac0909061cd3f1f2de20 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Sat, 13 Jun 2026 15:22:45 +0100
Subject: [PATCH 5/6] test(subscriptions): pin admin-only gate on
 register_subscription (#1089)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a regression test asserting a non-admin caller gets 403 from
register_subscription before any subscription is created or the
key-rollover fan-out runs — mirroring the owner gate already covered on
the reassign path.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../feature-flows/subscription-auto-switch.md    |  2 +-
 .../unit/test_subscription_reassign_hotreload.py | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/memory/feature-flows/subscription-auto-switch.md b/docs/memory/feature-flows/subscription-auto-switch.md
index 898be32b..8e416a08 100644
--- a/docs/memory/feature-flows/subscription-auto-switch.md
+++ b/docs/memory/feature-flows/subscription-auto-switch.md
@@ -88,7 +88,7 @@ import from `backend.services`. Keep the two in sync when editing either.
 | Frontend | `src/frontend/src/views/Settings.vue` | Toggle in Subscriptions section |
 | Tests | `tests/test_subscription_auto_switch.py` | Smoke tests |
 | Tests | `tests/unit/test_subscription_auto_switch_pingpong.py` | Unit regression for #444 ping-pong prevention; `TestRateLimitAging` (#476) pins 2h-window correctness; `TestHotReloadSwitch` + `TestKeyRolloverFanOut` (#1089) pin the hot-reload helper, auto-switch wire-in, and key-rollover fan-out |
-| Tests | `tests/unit/test_subscription_reassign_hotreload.py` | #1089 — manual sub→sub hot-reload under the lock (no `container_stop`), mode-change still recreates, register/upsert key-rollover fan-out |
+| Tests | `tests/unit/test_subscription_reassign_hotreload.py` | #1089 — manual sub→sub hot-reload under the lock (no `container_stop`), mode-change still recreates, register/upsert key-rollover fan-out, and the admin-only gate on `register_subscription` (non-admin → 403 before any create or fan-out) |
 | Tests | `tests/unit/test_reload_token_endpoint.py` | #1089 — agent-server `POST /api/credentials/reload-token`: sets env, atomically writes the `/var/lib/trinity/oauth-token` override at `0600`, no `.env` write, `remove_api_key` pops `ANTHROPIC_API_KEY`, empty token → 400 |
 | Tests | `tests/unit/test_subscription_auto_switch_no_cred_import.py` | Chain-level regression for #606 — pins `_restart_agent → start_agent_internal → inject_assigned_credentials` reaches the `lifecycle.py:155` `subscription_mode` short-circuit and never re-enters file-based credential import |
 | Tests | `tests/unit/test_iso_cutoff.py` | Format parity between `iso_cutoff(N)` and `utc_now_iso()` (#476) |
diff --git a/tests/unit/test_subscription_reassign_hotreload.py b/tests/unit/test_subscription_reassign_hotreload.py
index 85ebb68a..0f7cddd4 100644
--- a/tests/unit/test_subscription_reassign_hotreload.py
+++ b/tests/unit/test_subscription_reassign_hotreload.py
@@ -280,3 +280,19 @@ async def _boom(sub_id):
         result = await register_env.rs.register_subscription(request, current_user=admin_user)
 
         assert result is register_env.created  # upsert NOT failed by the fan-out error
+
+    @pytest.mark.asyncio
+    async def test_non_admin_rejected(self, register_env, owner_user):
+        """register_subscription is admin-only — a non-admin gets 403 before any
+        create or key-rollover fan-out (mirrors the owner gate on reassign)."""
+        from fastapi import HTTPException
+        from db_models import SubscriptionCredentialCreate
+
+        request = SubscriptionCredentialCreate(name="sub-X", token="sk-ant-oat01-rolled")
+
+        with pytest.raises(HTTPException) as exc:
+            await register_env.rs.register_subscription(request, current_user=owner_user)
+
+        assert exc.value.status_code == 403
+        assert register_env.fanout_calls == []  # never reached the rollover fan-out
+        register_env.db.create_subscription.assert_not_called()

From 108bcf347d003cc447a98d78d1b9e7daf98f4367 Mon Sep 17 00:00:00 2001
From: Andrii Pasternak <andriipasternak31@gmail.com>
Date: Wed, 17 Jun 2026 23:52:08 +0100
Subject: [PATCH 6/6] test(subscriptions): patch subscription_auto_switch via
 sys.modules to fix seed-flaky reassign tests (#1089)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The manual-reassign / key-rollover fixtures stubbed
services.subscription_auto_switch via `import ... as auto_switch`, which binds the
`services` package attribute. The endpoint resolves the helpers through a
function-local `from services.subscription_auto_switch import (...)`, which binds
from sys.modules. The conftest #762 autouse fixture restores
sys.modules["services"] before/after every test, and under some pytest-randomly
orderings (seed 99999) the package attribute and the sys.modules submodule entry
drift to two different module objects — so the fixture patched one object while
the endpoint called the other (the real, unpatched helper), and 4 tests flaked.

Resolve the module via sys.modules in the fixtures (new _live_auto_switch helper)
so the patch always lands on the object the endpoint calls. Verified: full
tests/unit suite green under pytest-randomly seeds 99999/12345/67890 (the 4
failures under 99999 are gone, no regression on the others).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../test_subscription_reassign_hotreload.py   | 27 ++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test_subscription_reassign_hotreload.py b/tests/unit/test_subscription_reassign_hotreload.py
index 0f7cddd4..585c9849 100644
--- a/tests/unit/test_subscription_reassign_hotreload.py
+++ b/tests/unit/test_subscription_reassign_hotreload.py
@@ -32,6 +32,27 @@
 import pytest
 
 
+def _live_auto_switch():
+    """Return the live ``services.subscription_auto_switch`` object the endpoint
+    actually calls into, resolved via ``sys.modules`` — NOT the ``services``
+    package attribute.
+
+    ``routers/subscriptions.py`` reaches the hot-reload helpers through a
+    function-local ``from services.subscription_auto_switch import (...)``, which
+    binds from ``sys.modules["services.subscription_auto_switch"]``. A plain
+    ``import services.subscription_auto_switch as x`` instead binds ``x`` from the
+    ``services`` *package attribute*. The conftest's autouse #762 fixture restores
+    ``sys.modules["services"]`` before/after every test, and under some
+    pytest-randomly orderings (seed 99999) the package attribute and the
+    ``sys.modules`` submodule entry drift to two different module objects. Patching
+    the package-attribute object then misses the one the endpoint calls, so the
+    real helper runs and the test flakes (#1089). Resolving via ``sys.modules``
+    keeps the fixture's patch in lockstep with the endpoint regardless of order.
+    """
+    import services.subscription_auto_switch  # noqa: F401 — ensure it is imported
+    return sys.modules["services.subscription_auto_switch"]
+
+
 @pytest.fixture
 def owner_user():
     u = MagicMock()
@@ -89,7 +110,7 @@ async def _start(name):
     agent_service.start_agent_internal = _start
     monkeypatch.setitem(sys.modules, "services.agent_service", agent_service)
 
-    import services.subscription_auto_switch as auto_switch
+    auto_switch = _live_auto_switch()
 
     hot_calls: list = []
 
@@ -166,7 +187,7 @@ async def test_old_sub_snapshot_read_under_lock(self, manual_env, owner_user, mo
         lock lets a concurrent auto-switch change the assignment between the read
         and the assign, so the recreate-vs-hot-reload branch could be chosen
         against a stale `old_sub_id`."""
-        import services.subscription_auto_switch as auto_switch
+        auto_switch = _live_auto_switch()
 
         order: list[str] = []
 
@@ -234,7 +255,7 @@ def register_env(monkeypatch):
     # register_subscription 503s without an encryption key configured.
     monkeypatch.setenv("CREDENTIAL_ENCRYPTION_KEY", "0" * 64)
 
-    import services.subscription_auto_switch as auto_switch
+    auto_switch = _live_auto_switch()
 
     fanout_calls: list = []