From 1ad08d668a6561ad3ad95acaafa02961de27254d Mon Sep 17 00:00:00 2001
From: Justin Ramos <justin.ramos@gmail.com>
Date: Fri, 15 May 2026 08:51:00 -0600
Subject: [PATCH 1/5] chore: gitignore output/ run logs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The output/ directory holds per-run logs, evolved artifacts, and gate
decisions — all generated, none committable. Previous gitignore pattern
output/**/*.md only excluded markdown, leaving .log and .json files
showing as untracked in every git status, which has been quiet noise
across the recent provider work.
---
 .gitignore | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 974099aa..d5436f8b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,8 +15,8 @@ datasets/**/*.jsonl
 datasets/**/*.json
 !datasets/.gitkeep
 
-# Output files from run
-output/**/*.md
+# Output files from run (per-run logs, evolved artifacts, gate decisions)
+output/
 
 # Evolution snapshots
 snapshots/

From b2dfe239619b0fd4271b394d079733c2da4b3ba6 Mon Sep 17 00:00:00 2001
From: Justin Ramos <justin.ramos@gmail.com>
Date: Fri, 15 May 2026 08:57:38 -0600
Subject: [PATCH 2/5] feat(nous_lm): NousLM with OAuth refresh + agent_key
 minting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A user with provider: nous in ~/.hermes/config.yaml currently can't
actually use the framework — the resolver reads the OAuth access_token
from the credential pool and hands that to LiteLLM as the inference
Bearer, but Nous's inference endpoint requires the short-lived
agent_key (a separate credential minted via POST /api/oauth/agent-key).
This commit provides the LM subclass that handles the two-stage
credential model.

Mirrors hermes_cli/auth.py:3061-3193 (resolve_nous_runtime_credentials):

  * Refresh OAuth access_token in-memory when within 120s of expiry
    via POST {portal}/api/oauth/token (standard refresh_token grant
    with client_id="hermes-cli")
  * Mint a fresh agent_key when missing or within 120s of expiry via
    POST {portal}/api/oauth/agent-key (Bearer access_token, ask for
    1800s min TTL)
  * Refresh-first-then-mint sequencing so a stale access_token doesn't
    cause mint failures
  * Mint 401 → refresh OAuth once and retry mint (Hermes pattern)
  * Inference 401 → force re-mint and retry once (mid-run recovery)

Cross-instance state sharing via _SharedNousState keyed by initial
refresh_token. The four LM roles (optimizer, reflection, eval, judge)
share the lock + state, so a four-thread evolution doesn't trigger
four parallel mints (which would race the portal's single-use
refresh-token rotation and produce refresh_token_reused errors on
three of them).

In-memory only — no auth.json writeback. Long evolutions (>30 min on
a fresh agent_key) refresh + re-mint in-process; the on-disk store
stays at whatever `hermes model` last wrote. Avoids write-conflict
surface with concurrent Hermes sessions that may also be refreshing.

Error classification mirrors Hermes's own (auth.py:2595-2624):
invalid_grant/invalid_token + HTTP 401/403 from OAuth endpoint surface
HermesProviderError with `hermes model` recovery hint;
refresh_token_reused gets the special "another client consumed it"
message; mint failures translate similarly.

oauth_helpers.parse_iso_or_epoch handles both Nous's ISO 8601
expires_at and Codex's Unix epoch float — kept in a small standalone
module so the next OAuth provider has somewhere obvious to extend
without bloating either provider's LM file.
---
 evolution/core/nous_lm.py       | 480 +++++++++++++++++++++++++++++
 evolution/core/oauth_helpers.py |  49 +++
 tests/core/test_nous_lm.py      | 514 ++++++++++++++++++++++++++++++++
 3 files changed, 1043 insertions(+)
 create mode 100644 evolution/core/nous_lm.py
 create mode 100644 evolution/core/oauth_helpers.py
 create mode 100644 tests/core/test_nous_lm.py

diff --git a/evolution/core/nous_lm.py b/evolution/core/nous_lm.py
new file mode 100644
index 00000000..ec2df197
--- /dev/null
+++ b/evolution/core/nous_lm.py
@@ -0,0 +1,480 @@
+"""DSPy LM subclass for Nous Portal — OAuth refresh + agent_key minting.
+
+Nous Portal uses a two-stage credential model that's meaningfully different
+from Codex:
+
+  1. **OAuth access_token** (long-lived, days). Refreshable via the standard
+     refresh_token grant at ``{portal}/api/oauth/token``.
+  2. **agent_key** (short-lived, ~30 minutes). Minted from the access_token
+     by POSTing to ``{portal}/api/oauth/agent-key``. The inference endpoint
+     (``inference-api.nousresearch.com``) requires the **agent_key** as
+     Bearer — not the access_token.
+
+This module mirrors Hermes's own ``resolve_nous_runtime_credentials`` flow
+at ``hermes_cli/auth.py:3061-3193``: refresh the OAuth token first if
+expiring, then mint a fresh agent_key from it. On inference 401, force
+re-mint and retry once. State is shared across LM instances via
+``_STATE_BY_KEY`` so the four LM roles (optimizer, reflection, eval,
+judge) coordinate through one lock and one mint per refresh window —
+without this, four parallel workers entering the skew window would each
+mint and three would race the portal.
+
+In-memory only — no auth.json writeback. Long evolutions (>30 min on a
+fresh agent_key) refresh in-process, but the on-disk store stays at
+whatever ``hermes model`` last wrote. Avoids the write-conflict surface
+with concurrent Hermes sessions that may also be refreshing.
+"""
+
+from __future__ import annotations
+
+import os
+import threading
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+import dspy
+import httpx
+import litellm
+
+from evolution.core.hermes_provider import HermesProviderError
+from evolution.core.oauth_helpers import parse_iso_or_epoch
+
+
+# Mirrors hermes_cli/auth.py:67-72 — reading the same constants keeps
+# us in lockstep with Hermes's own behavior. Override via env vars when
+# pointing at a stage portal or local mock for tests.
+NOUS_PORTAL_BASE_URL = os.getenv(
+    "HERMES_PORTAL_BASE_URL", "https://portal.nousresearch.com"
+)
+NOUS_INFERENCE_BASE_URL = os.getenv(
+    "NOUS_INFERENCE_BASE_URL", "https://inference-api.nousresearch.com/v1"
+)
+NOUS_OAUTH_CLIENT_ID = "hermes-cli"
+
+# Refresh OAuth access tokens 2 minutes before they expire and re-mint
+# the inference agent_key 2 minutes before it expires. Hermes uses the
+# same constants at hermes_cli/auth.py:71-72; matching keeps multi-process
+# workloads from racing each other onto the wire.
+OAUTH_REFRESH_SKEW_SECONDS = 120
+AGENT_KEY_REFRESH_SKEW_SECONDS = 120
+# Ask the portal for at least 30 minutes of agent_key TTL on each mint;
+# the portal is free to grant more. Mirrors DEFAULT_AGENT_KEY_MIN_TTL.
+AGENT_KEY_MIN_TTL_SECONDS = 30 * 60
+
+
+@dataclass
+class _SharedNousState:
+    """OAuth + agent_key state shared across NousLM instances for the same
+    Nous account.
+
+    Keyed in ``_STATE_BY_KEY`` by the initial refresh_token observed at
+    construction. All NousLMs created from the same resolver factory share
+    the same key, so a refresh or mint by any one of them is visible to
+    the others — without this, four parallel workers entering the skew
+    window simultaneously would each POST refresh+mint and three would
+    receive ``refresh_token_reused`` from the portal.
+    """
+
+    access_token: str
+    refresh_token: str
+    oauth_expires_at: Optional[float]
+    agent_key: Optional[str]
+    agent_key_expires_at: Optional[float]
+    lock: threading.Lock
+
+    def __deepcopy__(self, memo):
+        # NousLM uses dspy.LM.copy() (which deepcopies the whole instance)
+        # to apply role-specific kwargs. Locks aren't deep-copyable, and
+        # the *point* of shared state is to be shared. A copied NousLM
+        # must observe refreshes/mints performed against the original, so
+        # the copy keeps the same _SharedNousState reference.
+        return self
+
+
+_STATE_BY_KEY: Dict[str, _SharedNousState] = {}
+_STATE_REGISTRY_LOCK = threading.Lock()
+
+
+def _get_or_register_state(
+    *,
+    key: str,
+    access_token: str,
+    refresh_token: str,
+    oauth_expires_at: Optional[float],
+    agent_key: Optional[str],
+    agent_key_expires_at: Optional[float],
+) -> _SharedNousState:
+    """Register a new shared state on first observation; return the existing
+    one on subsequent calls. The first instance's OAuth values win — they're
+    the freshest at startup and any later instance with the same key was
+    constructed from the same source.
+    """
+    with _STATE_REGISTRY_LOCK:
+        if key not in _STATE_BY_KEY:
+            _STATE_BY_KEY[key] = _SharedNousState(
+                access_token=access_token,
+                refresh_token=refresh_token,
+                oauth_expires_at=oauth_expires_at,
+                agent_key=agent_key,
+                agent_key_expires_at=agent_key_expires_at,
+                lock=threading.Lock(),
+            )
+        return _STATE_BY_KEY[key]
+
+
+def _reset_state_for_tests() -> None:
+    """Test-only: clear the module-level state cache so each test starts
+    from a clean slate. Tests that share state across cases would observe
+    refreshes/mints from prior tests bleeding through.
+    """
+    with _STATE_REGISTRY_LOCK:
+        _STATE_BY_KEY.clear()
+
+
+class NousLM(dspy.LM):
+    """DSPy LM for Nous Portal — handles OAuth refresh + agent_key minting."""
+
+    def __init__(
+        self,
+        model: str,
+        *,
+        access_token: str,
+        refresh_token: str,
+        oauth_expires_at: Optional[float] = None,
+        agent_key: Optional[str] = None,
+        agent_key_expires_at: Optional[float] = None,
+        portal_base_url: Optional[str] = None,
+        inference_base_url: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        kwargs["api_base"] = inference_base_url or NOUS_INFERENCE_BASE_URL
+        kwargs["api_key"] = agent_key or ""
+
+        super().__init__(model=model, **kwargs)
+
+        self._portal_base_url = portal_base_url or NOUS_PORTAL_BASE_URL
+
+        # The lookup key for shared state — falls back to id(self) so test
+        # scenarios with synthetic creds get per-instance isolation rather
+        # than colliding on the empty-string key.
+        self._state_key = refresh_token or f"no-refresh:{id(self)}"
+        self._shared_state = _get_or_register_state(
+            key=self._state_key,
+            access_token=access_token,
+            refresh_token=refresh_token,
+            oauth_expires_at=oauth_expires_at,
+            agent_key=agent_key,
+            agent_key_expires_at=agent_key_expires_at,
+        )
+
+        # Initial mint if the constructor-supplied agent_key is missing or
+        # already expiring. Cheap on the happy path; one POST otherwise.
+        self._ensure_credentials()
+
+    # ------------------------------------------------------------------
+    # Refresh + mint orchestration
+    # ------------------------------------------------------------------
+
+    def _oauth_needs_refresh(self) -> bool:
+        if self._shared_state.oauth_expires_at is None:
+            # Unknown expiry → don't speculatively refresh; let the mint
+            # call surface a 401 if the access_token is actually dead.
+            return False
+        return (
+            time.time() + OAUTH_REFRESH_SKEW_SECONDS
+            >= self._shared_state.oauth_expires_at
+        )
+
+    def _agent_key_needs_mint(self) -> bool:
+        if not self._shared_state.agent_key:
+            return True
+        if self._shared_state.agent_key_expires_at is None:
+            # Have a key but no expiry — treat as needing re-mint to be
+            # safe. Cheaper than letting it 401 mid-run.
+            return True
+        return (
+            time.time() + AGENT_KEY_REFRESH_SKEW_SECONDS
+            >= self._shared_state.agent_key_expires_at
+        )
+
+    def _sync_from_shared_state(self) -> None:
+        """Pull the latest agent_key out of shared state into self.kwargs."""
+        self.kwargs["api_key"] = self._shared_state.agent_key or ""
+
+    def _ensure_credentials(self) -> None:
+        """Acquire the per-account lock; refresh OAuth and/or mint as needed.
+
+        Double-checked locking: when N threads enter the skew window
+        simultaneously, only the first one performs the HTTP round-trip;
+        the others observe the updated state after acquiring the lock and
+        return without touching the network.
+        """
+        if not self._oauth_needs_refresh() and not self._agent_key_needs_mint():
+            self._sync_from_shared_state()
+            return
+
+        with self._shared_state.lock:
+            if self._oauth_needs_refresh():
+                self._refresh_oauth()
+            if self._agent_key_needs_mint():
+                self._mint_agent_key(allow_oauth_retry=True)
+            self._sync_from_shared_state()
+
+    def _force_remint(self) -> None:
+        """Skip skew check and re-mint immediately. Called when an inference
+        call returned 401 — the cached agent_key is bad and we don't want
+        to wait for the skew window.
+        """
+        with self._shared_state.lock:
+            self._mint_agent_key(allow_oauth_retry=True)
+            self._sync_from_shared_state()
+
+    # ------------------------------------------------------------------
+    # OAuth refresh
+    # ------------------------------------------------------------------
+
+    def _refresh_oauth(self) -> None:
+        """POST refresh_token grant; on success, mutate shared state."""
+        if not self._shared_state.refresh_token:
+            raise HermesProviderError(
+                "Nous Portal access token is expiring but no refresh_token "
+                "is available. Run `hermes model` and select Nous Portal "
+                "to re-authenticate."
+            )
+
+        try:
+            with httpx.Client(timeout=httpx.Timeout(20.0)) as client:
+                response = client.post(
+                    f"{self._portal_base_url}/api/oauth/token",
+                    headers={
+                        "Accept": "application/json",
+                        "Content-Type": "application/x-www-form-urlencoded",
+                    },
+                    data={
+                        "grant_type": "refresh_token",
+                        "client_id": NOUS_OAUTH_CLIENT_ID,
+                        "refresh_token": self._shared_state.refresh_token,
+                    },
+                )
+        except httpx.HTTPError as exc:
+            raise HermesProviderError(
+                f"Nous Portal OAuth refresh failed ({exc}). Check network "
+                f"connectivity, then re-try; if the failure persists, run "
+                f"`hermes model` to re-authenticate."
+            ) from exc
+
+        if response.status_code != 200:
+            raise HermesProviderError(_format_oauth_error(response))
+
+        try:
+            payload = response.json()
+        except ValueError as exc:
+            raise HermesProviderError(
+                "Nous Portal OAuth refresh returned invalid JSON. "
+                "Run `hermes model` to re-authenticate."
+            ) from exc
+
+        new_access = payload.get("access_token")
+        if not isinstance(new_access, str) or not new_access.strip():
+            raise HermesProviderError(
+                "Nous Portal OAuth refresh response was missing access_token. "
+                "Run `hermes model` to re-authenticate."
+            )
+
+        # Refresh tokens may rotate (single-use semantics). Honor the new
+        # one if present; missing means the portal kept the original valid.
+        new_refresh = payload.get("refresh_token")
+        if isinstance(new_refresh, str) and new_refresh.strip():
+            self._shared_state.refresh_token = new_refresh.strip()
+
+        expires_in = payload.get("expires_in")
+        if isinstance(expires_in, (int, float)) and expires_in > 0:
+            self._shared_state.oauth_expires_at = time.time() + float(expires_in)
+        else:
+            # Conservative 1h fallback if the field is missing — keeps the
+            # next call from racing to the wire again immediately.
+            self._shared_state.oauth_expires_at = time.time() + 3600.0
+
+        self._shared_state.access_token = new_access.strip()
+
+    # ------------------------------------------------------------------
+    # Agent_key minting
+    # ------------------------------------------------------------------
+
+    def _mint_agent_key(self, *, allow_oauth_retry: bool) -> None:
+        """POST agent-key mint; on 401, optionally refresh OAuth and retry.
+
+        Mirrors Hermes's mint-401-triggers-refresh-retry pattern at
+        ``hermes_cli/auth.py:3122-3174``. ``allow_oauth_retry`` is True on
+        the first call from ``_ensure_credentials``; the recursive retry
+        passes False to bound the recursion at one OAuth refresh.
+        """
+        try:
+            with httpx.Client(timeout=httpx.Timeout(20.0)) as client:
+                response = client.post(
+                    f"{self._portal_base_url}/api/oauth/agent-key",
+                    headers={
+                        "Accept": "application/json",
+                        "Content-Type": "application/json",
+                        "Authorization": f"Bearer {self._shared_state.access_token}",
+                    },
+                    json={"min_ttl_seconds": AGENT_KEY_MIN_TTL_SECONDS},
+                )
+        except httpx.HTTPError as exc:
+            raise HermesProviderError(
+                f"Nous Portal agent-key mint failed ({exc}). Check network "
+                f"connectivity, then re-try; if the failure persists, run "
+                f"`hermes model` to re-authenticate."
+            ) from exc
+
+        if response.status_code == 200:
+            self._absorb_mint_response(response)
+            return
+
+        # 401 from mint → access_token may be stale even though OAuth said
+        # it's still valid. Refresh once and retry. After that, give up.
+        if response.status_code == 401 and allow_oauth_retry:
+            self._refresh_oauth()
+            self._mint_agent_key(allow_oauth_retry=False)
+            return
+
+        raise HermesProviderError(_format_mint_error(response))
+
+    def _absorb_mint_response(self, response: httpx.Response) -> None:
+        try:
+            payload = response.json()
+        except ValueError as exc:
+            raise HermesProviderError(
+                "Nous Portal agent-key mint returned invalid JSON. "
+                "Run `hermes model` to re-authenticate."
+            ) from exc
+
+        # Hermes uses both ``api_key`` (current portal field) and falls back
+        # to ``agent_key`` (older shape). Mirror both so a portal protocol
+        # rev doesn't break us.
+        agent_key = payload.get("api_key") or payload.get("agent_key")
+        if not isinstance(agent_key, str) or not agent_key.strip():
+            raise HermesProviderError(
+                "Nous Portal agent-key mint response was missing api_key. "
+                "Run `hermes model` to re-authenticate."
+            )
+
+        # ``expires_at`` is ISO 8601; ``expires_in`` is seconds-from-now.
+        # Prefer expires_at when both present (server-authoritative).
+        new_expires_at = parse_iso_or_epoch(payload.get("expires_at"))
+        if new_expires_at is None:
+            expires_in = payload.get("expires_in")
+            if isinstance(expires_in, (int, float)) and expires_in > 0:
+                new_expires_at = time.time() + float(expires_in)
+            else:
+                # Conservative — assume the floor TTL we asked for.
+                new_expires_at = time.time() + AGENT_KEY_MIN_TTL_SECONDS
+
+        self._shared_state.agent_key = agent_key.strip()
+        self._shared_state.agent_key_expires_at = new_expires_at
+
+    # ------------------------------------------------------------------
+    # forward / aforward — ensure creds, then delegate. Catch 401 once.
+    # ------------------------------------------------------------------
+
+    def forward(self, prompt=None, messages=None, **kwargs):  # type: ignore[override]
+        self._ensure_credentials()
+        try:
+            return super().forward(prompt=prompt, messages=messages, **kwargs)
+        except litellm.AuthenticationError:
+            # Cached agent_key is dead despite passing the skew check.
+            # Force re-mint (which may also refresh OAuth on its own 401)
+            # and retry once. A second 401 propagates so the auth-abort
+            # sentinel + cost-ceiling path catches it.
+            self._force_remint()
+            return super().forward(prompt=prompt, messages=messages, **kwargs)
+
+    async def aforward(self, prompt=None, messages=None, **kwargs):  # type: ignore[override]
+        self._ensure_credentials()
+        try:
+            return await super().aforward(prompt=prompt, messages=messages, **kwargs)
+        except litellm.AuthenticationError:
+            self._force_remint()
+            return await super().aforward(prompt=prompt, messages=messages, **kwargs)
+
+
+# ----------------------------------------------------------------------
+# Error classification — mirror hermes-agent error taxonomy
+# ----------------------------------------------------------------------
+
+# OAuth error codes from the Nous portal's /api/oauth/token endpoint that
+# indicate a permanently invalid refresh token. User must re-authenticate.
+_OAUTH_RELOGIN_ERROR_CODES = frozenset({"invalid_grant", "invalid_token"})
+
+
+def _format_oauth_error(response: httpx.Response) -> str:
+    """Translate a non-200 OAuth refresh response into an actionable user
+    message. Mirrors hermes_cli/auth.py:2595-2624.
+    """
+    code, detail = _parse_error_body(response)
+
+    if code == "refresh_token_reused" or "reuse" in detail.lower():
+        return (
+            "Nous Portal refresh token was already consumed by another "
+            "client (the portal enforces single-use refresh-token rotation). "
+            "Run `hermes model` and select Nous Portal to re-authenticate."
+        )
+
+    if code in _OAUTH_RELOGIN_ERROR_CODES or response.status_code in (401, 403):
+        return (
+            f"Nous Portal OAuth refresh failed ({code}: {detail}). "
+            f"Run `hermes model` and select Nous Portal to re-authenticate."
+        )
+
+    return (
+        f"Nous Portal OAuth refresh failed ({code}: {detail}). "
+        f"Re-try; if the failure persists, run `hermes model`."
+    )
+
+
+def _format_mint_error(response: httpx.Response) -> str:
+    """Translate a non-200 agent-key mint response. 401 from mint is
+    handled in ``_mint_agent_key`` (refresh-retry); this formatter sees
+    only the unrecoverable cases.
+    """
+    code, detail = _parse_error_body(response)
+    if response.status_code in (401, 403):
+        return (
+            f"Nous Portal agent-key mint failed ({code}: {detail}). "
+            f"Run `hermes model` and select Nous Portal to re-authenticate."
+        )
+    return (
+        f"Nous Portal agent-key mint failed (HTTP {response.status_code}, "
+        f"{code}: {detail}). Re-try; if the failure persists, run "
+        f"`hermes model`."
+    )
+
+
+def _parse_error_body(response: httpx.Response) -> tuple[str, str]:
+    """Best-effort parse of OAuth-style error JSON. Returns (code, detail)
+    with sensible defaults when the body is missing or malformed.
+    """
+    code = "unknown"
+    detail = f"status {response.status_code}"
+    try:
+        body = response.json()
+        if isinstance(body, dict):
+            err = body.get("error")
+            if isinstance(err, dict):
+                # OpenAI shape: {"error": {"code": ..., "message": ...}}
+                nested_code = err.get("code") or err.get("type")
+                if isinstance(nested_code, str) and nested_code.strip():
+                    code = nested_code.strip()
+                nested_msg = err.get("message")
+                if isinstance(nested_msg, str) and nested_msg.strip():
+                    detail = nested_msg.strip()
+            elif isinstance(err, str) and err.strip():
+                # OAuth-spec shape: {"error": "code", "error_description": "..."}
+                code = err.strip()
+                desc = body.get("error_description") or body.get("message")
+                if isinstance(desc, str) and desc.strip():
+                    detail = desc.strip()
+    except ValueError:
+        pass
+    return code, detail
diff --git a/evolution/core/oauth_helpers.py b/evolution/core/oauth_helpers.py
new file mode 100644
index 00000000..7efbe5e6
--- /dev/null
+++ b/evolution/core/oauth_helpers.py
@@ -0,0 +1,49 @@
+"""Shared OAuth helpers used by Codex and Nous LM wrappers.
+
+Kept as a small standalone module so the next OAuth provider that needs
+in-memory refresh has somewhere obvious to drop shared utilities without
+bloating either provider's LM file.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, Optional
+
+
+def parse_iso_or_epoch(value: Any) -> Optional[float]:
+    """Coerce an expires_at value into Unix epoch seconds.
+
+    Different OAuth providers serialize token expiry in different shapes:
+
+      * Nous Portal stores ISO 8601 strings ("2026-05-15T10:30:00+00:00")
+        in ``~/.hermes/auth.json``.
+      * Codex stores Unix epoch floats (or decodes from a JWT ``exp`` claim).
+      * Older or hand-edited entries may omit it entirely.
+
+    Returns the equivalent Unix epoch float, or None when the value is
+    missing, malformed, or has no parseable shape. Callers treat None as
+    "unknown" — typically meaning "trigger a refresh" defensively.
+    """
+    if value is None:
+        return None
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        s = value.strip()
+        if not s:
+            return None
+        # ISO 8601 — Python's fromisoformat handles "+00:00" but not the
+        # bare "Z" suffix common in OpenAI-shaped responses.
+        if s.endswith("Z"):
+            s = s[:-1] + "+00:00"
+        try:
+            return datetime.fromisoformat(s).timestamp()
+        except ValueError:
+            pass
+        # Numeric-looking string ("1747299600") — treat as epoch seconds.
+        try:
+            return float(s)
+        except ValueError:
+            return None
+    return None
diff --git a/tests/core/test_nous_lm.py b/tests/core/test_nous_lm.py
new file mode 100644
index 00000000..156b8b35
--- /dev/null
+++ b/tests/core/test_nous_lm.py
@@ -0,0 +1,514 @@
+"""Tests for the Nous Portal LM subclass.
+
+Covers:
+  * Construction wiring (inference_base_url, agent_key)
+  * Initial mint when agent_key missing or expiring
+  * OAuth refresh when access_token expiring
+  * Two-stage refresh-then-mint when both expiring
+  * Mint 401 → refresh + retry mint (Hermes pattern)
+  * Inference 401 → force re-mint and retry once
+  * Cross-instance state sharing (4 workers, 1 mint)
+  * Async path (aforward)
+  * Error classification: invalid_grant, refresh_token_reused
+"""
+
+from __future__ import annotations
+
+import asyncio
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+from unittest.mock import MagicMock, patch
+
+import httpx
+import litellm
+import pytest
+
+from evolution.core.hermes_provider import HermesProviderError
+from evolution.core.nous_lm import (
+    AGENT_KEY_MIN_TTL_SECONDS,
+    NOUS_OAUTH_CLIENT_ID,
+    NousLM,
+    _format_mint_error,
+    _format_oauth_error,
+    _reset_state_for_tests,
+)
+from evolution.core.oauth_helpers import parse_iso_or_epoch
+
+
+@pytest.fixture(autouse=True)
+def _clean_nous_state():
+    _reset_state_for_tests()
+    yield
+    _reset_state_for_tests()
+
+
+def _mock_response(*, status_code: int = 200, json_body: dict | None = None) -> MagicMock:
+    mock = MagicMock(spec=httpx.Response)
+    mock.status_code = status_code
+    if json_body is not None:
+        mock.json = MagicMock(return_value=json_body)
+    else:
+        mock.json = MagicMock(side_effect=ValueError("no body"))
+    return mock
+
+
+def _mock_httpx_post(responses: list):
+    """Build an httpx.Client mock that returns responses in order across
+    calls to .post(). Lets us script multi-step flows (refresh-then-mint,
+    mint-401-refresh-retry).
+    """
+    client = MagicMock()
+    client.__enter__.return_value = client
+    client.post.side_effect = responses
+    return client
+
+
+# ---------------------------------------------------------------------------
+# Construction
+# ---------------------------------------------------------------------------
+
+
+class TestNousLMConstruction:
+    def test_wires_inference_base_url_and_initial_agent_key(self):
+        # Pre-supplying a fresh agent_key should NOT trigger initial mint.
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,  # not expiring
+                agent_key="initial-agent-key",
+                agent_key_expires_at=time.time() + 1800,  # not expiring
+                inference_base_url="https://test-inference/v1",
+            )
+            assert lm.kwargs["api_base"] == "https://test-inference/v1"
+            assert lm.kwargs["api_key"] == "initial-agent-key"
+            mock_cls.assert_not_called()
+
+    def test_falls_back_to_default_inference_base(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "minted", "expires_in": 1800})]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+            )
+            assert "inference-api.nousresearch.com" in lm.kwargs["api_base"]
+
+
+# ---------------------------------------------------------------------------
+# Initial mint behavior
+# ---------------------------------------------------------------------------
+
+
+class TestInitialMint:
+    def test_mints_when_agent_key_missing(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "fresh-mint", "expires_in": 1800})]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+                agent_key=None,
+            )
+            assert lm.kwargs["api_key"] == "fresh-mint"
+            # Verify the mint POST shape
+            client = mock_cls.return_value
+            assert client.post.call_count == 1
+            call = client.post.call_args
+            assert "/api/oauth/agent-key" in call.args[0]
+            assert call.kwargs["headers"]["Authorization"] == "Bearer oauth-tok"
+            assert call.kwargs["json"]["min_ttl_seconds"] == AGENT_KEY_MIN_TTL_SECONDS
+
+    def test_mints_when_agent_key_within_skew_window(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "fresh-mint", "expires_in": 1800})]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+                agent_key="stale-key",
+                agent_key_expires_at=time.time() + 60,  # inside 120s skew
+            )
+            assert lm.kwargs["api_key"] == "fresh-mint"
+
+    def test_skips_mint_when_agent_key_fresh(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+                agent_key="fresh-key",
+                agent_key_expires_at=time.time() + 1800,  # well outside skew
+            )
+            assert lm.kwargs["api_key"] == "fresh-key"
+            mock_cls.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Two-stage refresh + mint
+# ---------------------------------------------------------------------------
+
+
+class TestTwoStageRefreshMint:
+    def test_oauth_expiring_refreshes_then_mints(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={
+                            "access_token": "refreshed-oauth",
+                            "expires_in": 86400,
+                        }
+                    ),
+                    _mock_response(
+                        json_body={"api_key": "post-refresh-mint", "expires_in": 1800}
+                    ),
+                ]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="stale-oauth",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 30,  # within 120s skew
+                agent_key=None,  # also needs mint
+            )
+            client = mock_cls.return_value
+            assert client.post.call_count == 2
+            # First call: OAuth refresh
+            first = client.post.call_args_list[0]
+            assert "/api/oauth/token" in first.args[0]
+            assert first.kwargs["data"]["grant_type"] == "refresh_token"
+            assert first.kwargs["data"]["client_id"] == NOUS_OAUTH_CLIENT_ID
+            # Second call: mint with the REFRESHED access_token
+            second = client.post.call_args_list[1]
+            assert "/api/oauth/agent-key" in second.args[0]
+            assert second.kwargs["headers"]["Authorization"] == "Bearer refreshed-oauth"
+            assert lm.kwargs["api_key"] == "post-refresh-mint"
+
+    def test_oauth_response_rotated_refresh_token_persisted(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={
+                            "access_token": "new-oauth",
+                            "refresh_token": "rotated-refresh",
+                            "expires_in": 86400,
+                        }
+                    ),
+                    _mock_response(
+                        json_body={"api_key": "minted", "expires_in": 1800}
+                    ),
+                ]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="stale-oauth",
+                refresh_token="original-refresh",
+                oauth_expires_at=time.time() + 30,
+            )
+            assert lm._shared_state.refresh_token == "rotated-refresh"
+
+
+# ---------------------------------------------------------------------------
+# Mint 401 → refresh OAuth + retry mint (Hermes pattern)
+# ---------------------------------------------------------------------------
+
+
+class TestMint401TriggersRefreshRetry:
+    def test_mint_401_refreshes_and_retries(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    # First mint attempt: 401
+                    _mock_response(status_code=401, json_body={"error": "invalid_token"}),
+                    # OAuth refresh succeeds
+                    _mock_response(
+                        json_body={"access_token": "refreshed", "expires_in": 86400}
+                    ),
+                    # Second mint attempt with refreshed access_token: success
+                    _mock_response(json_body={"api_key": "post-retry-mint", "expires_in": 1800}),
+                ]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="stale",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,  # OAuth says "still valid"
+            )
+            client = mock_cls.return_value
+            assert client.post.call_count == 3
+            assert lm.kwargs["api_key"] == "post-retry-mint"
+
+    def test_mint_401_retry_also_fails_propagates(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(status_code=401, json_body={"error": "invalid_token"}),
+                    _mock_response(
+                        json_body={"access_token": "refreshed", "expires_in": 86400}
+                    ),
+                    # Retry mint also 401 — give up.
+                    _mock_response(status_code=401, json_body={"error": "invalid_token"}),
+                ]
+            )
+            with pytest.raises(HermesProviderError, match="hermes model"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="refresh-tok",
+                    oauth_expires_at=time.time() + 86400,
+                )
+
+
+# ---------------------------------------------------------------------------
+# Inference 401 → force re-mint + retry once
+# ---------------------------------------------------------------------------
+
+
+class TestInferenceForceRemint:
+    def _build_lm_with_initial_mint(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "first-mint", "expires_in": 1800})]
+            )
+            return NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+            )
+
+    def test_forward_recovers_from_401_with_remint_and_retry(self):
+        lm = self._build_lm_with_initial_mint()
+        # Now inference 401s once, then succeeds after re-mint.
+        with patch("dspy.LM.forward", autospec=True) as mock_super, \
+             patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_super.side_effect = [
+                litellm.AuthenticationError(
+                    message="401 Unauthorized",
+                    llm_provider="openai",
+                    model="openai/test-model",
+                ),
+                "ok",
+            ]
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "post-401-mint", "expires_in": 1800})]
+            )
+            result = lm.forward(messages=[{"role": "user", "content": "hi"}])
+            assert result == "ok"
+            assert mock_super.call_count == 2
+            # The cached agent_key was refreshed before the retry.
+            assert lm.kwargs["api_key"] == "post-401-mint"
+
+    def test_forward_propagates_second_401(self):
+        lm = self._build_lm_with_initial_mint()
+        with patch("dspy.LM.forward", autospec=True) as mock_super, \
+             patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            err = litellm.AuthenticationError(
+                message="401",
+                llm_provider="openai",
+                model="openai/test-model",
+            )
+            mock_super.side_effect = [err, err]
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})]
+            )
+            with pytest.raises(litellm.AuthenticationError):
+                lm.forward(messages=[{"role": "user", "content": "hi"}])
+
+
+# ---------------------------------------------------------------------------
+# Cross-instance state sharing — concurrent mint race
+# ---------------------------------------------------------------------------
+
+
+class TestCrossInstanceSharing:
+    def test_concurrent_mint_only_posts_once(self):
+        """Four NousLM instances sharing the same refresh_token must
+        coordinate so only ONE actually POSTs to /api/oauth/agent-key.
+        Without shared state, three would race the portal.
+        """
+        post_count = {"n": 0}
+        post_lock = threading.Lock()
+
+        def slow_post(*args, **kwargs):
+            with post_lock:
+                post_count["n"] += 1
+                time.sleep(0.05)
+            return _mock_response(
+                json_body={"api_key": "concurrent-mint", "expires_in": 1800}
+            )
+
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            client = MagicMock()
+            client.__enter__.return_value = client
+            client.post.side_effect = slow_post
+            mock_cls.return_value = client
+
+            shared_args = dict(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="shared-refresh",
+                oauth_expires_at=time.time() + 86400,
+                agent_key="stale-key",
+                agent_key_expires_at=time.time() + 60,
+            )
+            instances = [NousLM(**shared_args) for _ in range(4)]
+
+            with ThreadPoolExecutor(max_workers=4) as pool:
+                futs = [pool.submit(lm._ensure_credentials) for lm in instances]
+                for f in futs:
+                    f.result(timeout=10)
+
+            # Initial construction triggers one mint (skew check positive
+            # because agent_key_expires_at < now+120). _ensure_credentials
+            # called again on each of 4 threads should observe shared state
+            # and NOT mint again. So total POSTs = 1 (the constructor mint).
+            assert post_count["n"] == 1
+            for lm in instances:
+                assert lm.kwargs["api_key"] == "concurrent-mint"
+
+
+# ---------------------------------------------------------------------------
+# Async path
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncPath:
+    def test_aforward_invokes_ensure_credentials(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "minted", "expires_in": 1800})]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth",
+                refresh_token="refresh",
+                oauth_expires_at=time.time() + 86400,
+            )
+
+            with patch.object(NousLM, "_ensure_credentials") as mock_ensure, \
+                 patch("dspy.LM.aforward", autospec=True) as mock_super_aforward:
+                mock_super_aforward.return_value = asyncio.sleep(0, result="ok")
+
+                asyncio.run(
+                    lm.aforward(messages=[{"role": "user", "content": "hi"}])
+                )
+
+                mock_ensure.assert_called_once()
+                mock_super_aforward.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# Error classification
+# ---------------------------------------------------------------------------
+
+
+class TestErrorClassification:
+    def test_invalid_grant_surfaces_relogin_message(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    # OAuth refresh fails with invalid_grant.
+                    _mock_response(
+                        status_code=400,
+                        json_body={"error": "invalid_grant", "error_description": "bad"},
+                    ),
+                ]
+            )
+            with pytest.raises(HermesProviderError, match="hermes model"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="bad-refresh",
+                    oauth_expires_at=time.time() + 30,  # forces refresh path
+                )
+
+    def test_refresh_token_reused_special_message(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        status_code=400,
+                        json_body={
+                            "error": {
+                                "code": "refresh_token_reused",
+                                "message": "Already consumed",
+                            }
+                        },
+                    )
+                ]
+            )
+            with pytest.raises(HermesProviderError) as excinfo:
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="reused",
+                    oauth_expires_at=time.time() + 30,
+                )
+            msg = str(excinfo.value)
+            assert "another client" in msg
+            assert "hermes model" in msg
+
+    def test_format_oauth_error_handles_no_body(self):
+        resp = _mock_response(status_code=500, json_body=None)
+        msg = _format_oauth_error(resp)
+        assert "status 500" in msg
+
+    def test_format_mint_error_extracts_openai_shape(self):
+        resp = _mock_response(
+            status_code=403,
+            json_body={"error": {"code": "rate_limited", "message": "slow down"}},
+        )
+        msg = _format_mint_error(resp)
+        assert "rate_limited" in msg
+        assert "slow down" in msg
+        assert "hermes model" in msg
+
+
+# ---------------------------------------------------------------------------
+# parse_iso_or_epoch
+# ---------------------------------------------------------------------------
+
+
+class TestParseIsoOrEpoch:
+    def test_iso8601_with_offset(self):
+        result = parse_iso_or_epoch("2026-05-15T10:30:00+00:00")
+        assert result == 1778841000.0
+
+    def test_iso8601_with_z_suffix(self):
+        # OpenAI-shaped: trailing Z is shorthand for +00:00
+        result = parse_iso_or_epoch("2026-05-15T10:30:00Z")
+        assert result == 1778841000.0
+
+    def test_unix_epoch_float(self):
+        assert parse_iso_or_epoch(1779179400.0) == 1779179400.0
+
+    def test_unix_epoch_int(self):
+        assert parse_iso_or_epoch(1779179400) == 1779179400.0
+
+    def test_numeric_string(self):
+        assert parse_iso_or_epoch("1779179400") == 1779179400.0
+
+    def test_none_returns_none(self):
+        assert parse_iso_or_epoch(None) is None
+
+    def test_empty_string_returns_none(self):
+        assert parse_iso_or_epoch("") is None
+        assert parse_iso_or_epoch("   ") is None
+
+    def test_garbage_returns_none(self):
+        assert parse_iso_or_epoch("not-a-timestamp") is None

From df2b130114fa80911f14762c19c25388ab428534 Mon Sep 17 00:00:00 2001
From: Justin Ramos <justin.ramos@gmail.com>
Date: Fri, 15 May 2026 09:01:27 -0600
Subject: [PATCH 3/5] feat(hermes_provider): route provider: nous OAuth flow
 through NousLM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a new branch in resolve_default_lm: when canonical == "nous" AND
the auth.json pool entry has a refresh_token (signals OAuth-managed
flow that hermes model writes), build a ResolvedLM whose lm_factory
constructs a NousLM that handles the two-stage refresh + agent_key
mint internally.

Pool entries WITHOUT refresh_token (env-var-style NOUS_API_KEY users)
fall through to the existing OpenAI-wire direct-pass-through path
unchanged. Note that direct-pass-through path probably also doesn't
work for Nous (the pool's access_token field holds the OAuth token,
not the agent_key the inference endpoint needs) — but that's a
pre-existing condition orthogonal to this PR. We don't try to "upgrade"
those users silently.

Missing pool entry → HermesProviderError pointing at `hermes model`
recovery rather than silent fall-through to the broken direct path.

instantiate_lm + the existing _probe_via_factory in auth_check (added
in PR #58) both already dispatch on lm_factory presence — Nous flows
through them unchanged. The Nous recovery hint is already in
_HERMES_AUTH_COMMAND_BY_PROVIDER from the original auth-check work.
---
 evolution/core/hermes_provider.py | 111 +++++++++++++
 tests/core/test_nous_provider.py  | 267 ++++++++++++++++++++++++++++++
 2 files changed, 378 insertions(+)
 create mode 100644 tests/core/test_nous_provider.py

diff --git a/evolution/core/hermes_provider.py b/evolution/core/hermes_provider.py
index f66d0652..8c7c66f9 100644
--- a/evolution/core/hermes_provider.py
+++ b/evolution/core/hermes_provider.py
@@ -312,6 +312,18 @@ def resolve_default_lm(
             auth_store=auth_store, target_model=target_model, role=role
         )
 
+    # Nous Portal: when the credential pool entry has a refresh_token (the
+    # OAuth-managed flow that hermes model writes), route through NousLM
+    # for in-memory OAuth refresh + agent_key minting. The plain env-var
+    # NOUS_API_KEY path falls through to the generic OpenAI-wire handler
+    # below — no behavior change for that simpler setup.
+    if canonical == "nous":
+        nous_resolved = _maybe_resolve_nous_lm(
+            auth_store=auth_store, target_model=target_model, role=role
+        )
+        if nous_resolved is not None:
+            return nous_resolved
+
     if not target_model:
         raise HermesProviderError(
             f"~/.hermes/config.yaml sets provider='{requested_provider}' "
@@ -703,6 +715,105 @@ def _factory() -> Any:
     )
 
 
+def _maybe_resolve_nous_lm(
+    *,
+    auth_store: Dict[str, Any],
+    target_model: str,
+    role: Role,
+) -> Optional[ResolvedLM]:
+    """Build a NousLM-backed ResolvedLM when the auth.json pool entry
+    looks OAuth-managed; return None to let the caller fall through to
+    the generic OpenAI-wire handler when the entry is just an env-var-
+    style API key.
+
+    Nous uses a two-stage credential model: an OAuth access_token
+    (long-lived) is exchanged for a short-lived agent_key that's the
+    actual inference Bearer. NousLM handles both: refresh access_token
+    in-memory when expiring, mint a fresh agent_key from it, re-mint on
+    inference 401. See evolution/core/nous_lm.py.
+
+    The "looks OAuth-managed" signal: pool entry has a refresh_token. A
+    pool entry without refresh_token is either env-var-only (NOUS_API_KEY
+    set, no real OAuth state) or hand-edited; let the caller fall
+    through to direct pass-through so we don't break that setup.
+
+    The CodexLM-equivalent NousLM import is lazy to avoid a circular
+    dependency: nous_lm imports HermesProviderError from this module.
+    """
+    pool_entry = _pick_pool_entry(auth_store, "nous")
+    if pool_entry is None:
+        # No pool entry at all → hint operator at the right recovery
+        # rather than falling through silently to env-var resolution
+        # that probably also won't work.
+        raise HermesProviderError(
+            "~/.hermes/config.yaml sets provider='nous' but no usable "
+            "entry was found in ~/.hermes/auth.json credential_pool[\"nous\"]. "
+            "Run `hermes model` and select Nous Portal to authenticate, "
+            f"or pass --{role}-model to bypass Hermes resolution."
+        )
+
+    refresh_token = _str_or_none(pool_entry.get("refresh_token"))
+    if not refresh_token:
+        # OAuth flow not in play; let the caller use the existing
+        # env-var/access_token-as-Bearer path. Note this path will still
+        # 401 against Nous's inference endpoint when the field actually
+        # holds the OAuth access_token (Nous needs the minted agent_key),
+        # but we can't tell from here whether the user intends OAuth or
+        # env-var, so the conservative posture is "don't change behavior".
+        return None
+
+    access_token = _str_or_none(pool_entry.get("access_token"))
+    if not access_token:
+        raise HermesProviderError(
+            "~/.hermes/auth.json credential_pool[\"nous\"] entry has no "
+            "access_token. Run `hermes model` and select Nous Portal to "
+            "re-authenticate."
+        )
+
+    if not target_model:
+        raise HermesProviderError(
+            "~/.hermes/config.yaml sets provider='nous' but model.default "
+            f"is empty. Set it (e.g., 'Hermes-4-405B'), or pass --{role}-model."
+        )
+
+    # Lazy import to break the circular dependency with nous_lm.
+    from evolution.core.nous_lm import (  # noqa: PLC0415
+        NousLM as _NousLM,
+        NOUS_INFERENCE_BASE_URL,
+        NOUS_PORTAL_BASE_URL,
+    )
+    from evolution.core.oauth_helpers import parse_iso_or_epoch  # noqa: PLC0415
+
+    inference_base_url = (
+        _str_or_none(pool_entry.get("inference_base_url"))
+        or _str_or_none(pool_entry.get("base_url"))
+        or NOUS_INFERENCE_BASE_URL
+    )
+    oauth_expires_at = parse_iso_or_epoch(pool_entry.get("expires_at"))
+    agent_key = _str_or_none(pool_entry.get("agent_key"))
+    agent_key_expires_at = parse_iso_or_epoch(pool_entry.get("agent_key_expires_at"))
+
+    def _factory() -> Any:
+        return _NousLM(
+            model=f"openai/{target_model}",
+            access_token=access_token,
+            refresh_token=refresh_token,
+            oauth_expires_at=oauth_expires_at,
+            agent_key=agent_key,
+            agent_key_expires_at=agent_key_expires_at,
+            portal_base_url=NOUS_PORTAL_BASE_URL,
+            inference_base_url=inference_base_url,
+        )
+
+    return ResolvedLM(
+        model=f"openai/{target_model}",
+        lm_kwargs={},
+        source=f"hermes-config:nous(inference_base_url={inference_base_url})",
+        lm_factory=_factory,
+        provider_hint="nous",
+    )
+
+
 def _build_resolved_lm(
     *,
     provider: str,
diff --git a/tests/core/test_nous_provider.py b/tests/core/test_nous_provider.py
new file mode 100644
index 00000000..1ee4ec6f
--- /dev/null
+++ b/tests/core/test_nous_provider.py
@@ -0,0 +1,267 @@
+"""Tests for Nous Portal resolution in the Hermes-aware LM resolver.
+
+The resolver routes ``provider: nous`` through ``_maybe_resolve_nous_lm``
+when the auth.json pool entry has a refresh_token (signals OAuth-managed
+flow). Pool entries without refresh_token (env-var-style) fall back to
+the existing direct-pass-through path so we don't break that simpler
+setup. Without a pool entry at all, the resolver fails with an
+actionable `hermes model` recovery hint rather than silently routing to
+something that won't work.
+"""
+
+from __future__ import annotations
+
+import json
+import textwrap
+import time
+from pathlib import Path
+
+import pytest
+
+from evolution.core.hermes_provider import (
+    HermesProviderError,
+    ResolvedLM,
+    resolve_default_lm,
+)
+from evolution.core.nous_lm import NousLM, _reset_state_for_tests
+
+
+@pytest.fixture(autouse=True)
+def _clean_nous_state():
+    _reset_state_for_tests()
+    yield
+    _reset_state_for_tests()
+
+
+@pytest.fixture
+def hermes_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    home = tmp_path / "hermes_home"
+    home.mkdir()
+    for var in (
+        "OPENAI_API_KEY",
+        "ANTHROPIC_API_KEY",
+        "OPENROUTER_API_KEY",
+        "NOUS_API_KEY",
+    ):
+        monkeypatch.delenv(var, raising=False)
+    return home
+
+
+def _write_config(home: Path, body: str) -> None:
+    (home / "config.yaml").write_text(textwrap.dedent(body).lstrip())
+
+
+def _write_nous_pool(
+    home: Path,
+    *,
+    access_token: str = "oauth-tok",
+    refresh_token: str | None = "refresh-tok",
+    oauth_expires_at: str | None = "2026-12-01T00:00:00+00:00",
+    agent_key: str | None = None,
+    agent_key_expires_at: str | None = None,
+    inference_base_url: str | None = None,
+    extra: dict | None = None,
+) -> None:
+    entry: dict = {
+        "access_token": access_token,
+        "priority": 0,
+    }
+    if refresh_token is not None:
+        entry["refresh_token"] = refresh_token
+    if oauth_expires_at is not None:
+        entry["expires_at"] = oauth_expires_at
+    if agent_key is not None:
+        entry["agent_key"] = agent_key
+    if agent_key_expires_at is not None:
+        entry["agent_key_expires_at"] = agent_key_expires_at
+    if inference_base_url is not None:
+        entry["inference_base_url"] = inference_base_url
+    if extra:
+        entry.update(extra)
+    (home / "auth.json").write_text(
+        json.dumps({"credential_pool": {"nous": [entry]}})
+    )
+
+
+# ---------------------------------------------------------------------------
+# OAuth-managed flow: pool entry has refresh_token → NousLM factory
+# ---------------------------------------------------------------------------
+
+
+class TestNousResolutionWithOAuth:
+    def test_oauth_pool_entry_returns_factory(self, hermes_home):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        # Use a future agent_key so the factory's initial mint doesn't
+        # actually fire when the test calls factory() — keeps this test
+        # purely about the resolver's wiring.
+        _write_nous_pool(
+            hermes_home,
+            agent_key="fresh-key",
+            agent_key_expires_at="2026-12-01T00:00:00+00:00",
+        )
+        lm = resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+        assert isinstance(lm, ResolvedLM)
+        assert lm.model == "openai/Hermes-4-405B"
+        assert lm.lm_kwargs == {}
+        assert lm.lm_factory is not None
+        assert lm.provider_hint == "nous"
+
+    def test_factory_constructs_nous_lm_with_oauth_state(self, hermes_home):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        _write_nous_pool(
+            hermes_home,
+            access_token="real-oauth-tok",
+            refresh_token="real-refresh-tok",
+            agent_key="initial-agent-key",
+            agent_key_expires_at="2026-12-01T00:00:00+00:00",
+        )
+        resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+        nous_lm = resolved.lm_factory()
+        assert isinstance(nous_lm, NousLM)
+        assert nous_lm._shared_state.access_token == "real-oauth-tok"
+        assert nous_lm._shared_state.refresh_token == "real-refresh-tok"
+        assert nous_lm._shared_state.agent_key == "initial-agent-key"
+
+    def test_custom_inference_base_url_flows_through(self, hermes_home):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        _write_nous_pool(
+            hermes_home,
+            agent_key="fresh-key",
+            agent_key_expires_at="2026-12-01T00:00:00+00:00",
+            inference_base_url="https://custom-nous.example.com/v1",
+        )
+        resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+        nous_lm = resolved.lm_factory()
+        assert nous_lm.kwargs["api_base"] == "https://custom-nous.example.com/v1"
+
+
+# ---------------------------------------------------------------------------
+# Fallback paths
+# ---------------------------------------------------------------------------
+
+
+class TestNousResolutionFallbacks:
+    def test_pool_entry_without_refresh_token_falls_back_to_direct(
+        self, hermes_home
+    ):
+        # Env-var-style: pool has only access_token, no refresh_token.
+        # The resolver must fall through to the existing OpenAI-wire
+        # direct-pass-through path so a NOUS_API_KEY user keeps working.
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        _write_nous_pool(
+            hermes_home,
+            access_token="bare-api-key",
+            refresh_token=None,
+            oauth_expires_at=None,
+        )
+        resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+        # Direct pass-through path: openai/<model>, api_base + api_key in lm_kwargs,
+        # no factory.
+        assert resolved.lm_factory is None
+        assert resolved.model == "openai/Hermes-4-405B"
+        assert resolved.lm_kwargs.get("api_key") == "bare-api-key"
+
+    def test_missing_pool_entry_surfaces_recovery_hint(self, hermes_home):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        # No auth.json written → no credential pool.
+        with pytest.raises(HermesProviderError, match="hermes model"):
+            resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+
+    def test_empty_access_token_in_oauth_entry_surfaces_recovery(
+        self, hermes_home
+    ):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        # Has refresh_token (OAuth-managed signal) but no access_token.
+        _write_nous_pool(
+            hermes_home,
+            access_token="",
+            refresh_token="refresh-tok",
+        )
+        with pytest.raises(HermesProviderError, match="hermes model"):
+            resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+
+    def test_no_model_default_for_oauth_path_surfaces_actionable(
+        self, hermes_home
+    ):
+        _write_config(
+            hermes_home,
+            """
+            model:
+              provider: nous
+            """,
+        )
+        _write_nous_pool(hermes_home)
+        with pytest.raises(HermesProviderError, match="model.default"):
+            resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+
+
+# ---------------------------------------------------------------------------
+# Pool exhaustion regression
+# ---------------------------------------------------------------------------
+
+
+class TestPoolExhaustionRespected:
+    def test_exhausted_entry_skipped_with_future_reset(self, hermes_home):
+        # The existing _is_pool_entry_usable logic skips entries Hermes
+        # marked exhausted with a future cooldown. Confirm it still
+        # applies to the Nous OAuth path — should fall through to the
+        # missing-entry error.
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        _write_nous_pool(
+            hermes_home,
+            extra={
+                "last_status": "exhausted",
+                "last_error_reset_at": time.time() + 3600,  # 1h in future
+            },
+        )
+        with pytest.raises(HermesProviderError, match="hermes model"):
+            resolve_default_lm(role="optimizer", hermes_home=hermes_home)

From e53a4459907ac587ae1ad043795b0f9ee02313d7 Mon Sep 17 00:00:00 2001
From: Justin Ramos <justin.ramos@gmail.com>
Date: Fri, 15 May 2026 09:09:05 -0600
Subject: [PATCH 4/5] docs+chore: Nous Portal section + manual smoke harness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

docs/model_resolution.md:
  * New "Nous Portal OAuth + agent_key" section paralleling the Codex
    section. Documents the two-stage credential model, the in-memory
    refresh + mint flow, the HERMES_PORTAL_BASE_URL override knob, the
    in-memory-only posture, and the env-var-fallthrough behavior for
    pool entries without a refresh_token.
  * Future-work list trimmed: removes the now-shipped non-Codex OAuth
    bullet; rewords what remains so it's clear which providers are
    intentionally out of scope (Qwen / Spotify / Gemini).

tests/manual/nous_smoke.py (NEW):
  Runnable mock-server smoke that validates the Nous wire flow without
  needing a real Nous Portal account. Spins up a stdlib http.server
  pretending to be portal.nousresearch.com, drives a real NousLM (and
  through it, a real LiteLLM call) against it, asserts on five
  scenarios:

    1. Initial mint: agent-key POST carries the seed access_token as
       Bearer; min_ttl_seconds=1800 in body.
    2. OAuth-expiring → refresh-then-mint: confirms call ordering and
       that the mint POST uses the REFRESHED access_token (proves the
       sequencing isn't backwards).
    3. Inference uses the minted agent_key: the inference POST's
       Authorization header is the MINTED key, not the OAuth token —
       this is the headline bug the whole PR fixes.
    4. Mid-run inference 401: forward's exception handler force-re-mints
       and retries; the recorded HTTP exchange shows mint→infer(401)→
       re-mint→infer(200) with two distinct minted keys.
    5. OAuth refresh invalid_grant → HermesProviderError with the
       `hermes model` recovery hint.

  Smoke uses cache=False + num_retries=0 to expose the underlying
  network behavior — DSPy cache would otherwise leak prior responses
  across scenarios and LiteLLM's internal retry-on-401 would mask our
  own re-mint logic. Comments explain both choices.

  Not part of CI (heavyweight; spins up a server). Documented in the
  Nous setup section as the recommended way to gain confidence in the
  Nous flow when a real Portal account isn't available.
---
 docs/model_resolution.md   |  32 ++-
 tests/manual/nous_smoke.py | 455 +++++++++++++++++++++++++++++++++++++
 2 files changed, 486 insertions(+), 1 deletion(-)
 create mode 100644 tests/manual/nous_smoke.py

diff --git a/docs/model_resolution.md b/docs/model_resolution.md
index c739f333..18277da1 100644
--- a/docs/model_resolution.md
+++ b/docs/model_resolution.md
@@ -140,6 +140,36 @@ Refresh is **in-memory only** — the framework does not write back to `~/.herme
 
 **What's not supported:** streaming via the Responses endpoint (evolution doesn't stream), Codex-specific reasoning-effort overrides (DSPy's defaults work for gpt-5-class), and tool-call message conversion beyond what DSPy's `_convert_chat_request_to_responses_request` already handles. If a Codex 401 surfaces during a run, the standard auth-error panel renders with the `hermes auth add openai-codex` recovery hint.
 
+## Nous Portal OAuth + agent_key
+
+Nous Portal uses a two-stage credential model that's different from every other provider:
+
+1. **OAuth access_token** (long-lived, days). Refreshable via the standard `refresh_token` grant.
+2. **agent_key** (short-lived, ~30 minutes). Minted from the access_token via a Nous-specific `POST /api/oauth/agent-key`. The inference endpoint requires the **agent_key** as Bearer — not the access_token.
+
+Run `hermes model` and select Nous Portal to populate `~/.hermes/auth.json` with both. Then point `config.yaml` at Nous:
+
+```yaml
+# ~/.hermes/config.yaml
+model:
+  default: Hermes-4-405B
+  provider: nous
+```
+
+When the resolver detects a Nous credential pool entry with a `refresh_token` (signals OAuth-managed flow), the framework instantiates a `NousLM` subclass that:
+
+1. **Mints a fresh agent_key at preflight time** by POSTing to `{portal}/api/oauth/agent-key` with the OAuth access_token as Bearer.
+2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing at `hermes_cli/auth.py:3061-3193`.
+3. **Re-mints on inference 401** (mid-run agent_key revocation or expiration). The four LM roles (optimizer, reflection, eval, judge) coordinate through a shared lock so a four-thread evolution doesn't race the portal's single-use refresh-token rotation.
+
+The portal URL is overridable via `HERMES_PORTAL_BASE_URL` (Hermes's own env var name; sharing keeps configs portable for stage / mock setups).
+
+Refresh + mint state is **in-memory only** — the framework never writes back to `~/.hermes/auth.json`. For evolution sessions running longer than the on-disk agent_key TTL (~30 minutes since the last `hermes model`), the in-process refresh handles it. For multi-day sessions, periodic `hermes model` keeps the on-disk store fresh.
+
+**What's not supported:** auxiliary endpoints (vision / web-extract / session-search models from `auxiliary.*` config), streaming, and `auth.json` writeback. Pool entries without `refresh_token` (env-var-style `NOUS_API_KEY` setups) fall through to the existing direct-pass-through path — note that path probably doesn't actually work for Nous inference (the access_token isn't a valid Bearer), but we don't try to "upgrade" those users silently.
+
+A runnable smoke harness at `tests/manual/nous_smoke.py` validates the Nous wire flow against a local mock portal (no real Nous Portal account required). Run via `uv run python tests/manual/nous_smoke.py`.
+
 ## Per-role overrides
 
 When your provider exposes multiple models, you can pick a different one per role to manage cost. Common pattern: a frontier model for the optimizer + reflection LMs (where reasoning matters), a cheaper model for eval + judge (where you'll make many calls):
@@ -236,7 +266,7 @@ The framework defaults all four roles to Hermes's single `model.default`. To use
 
 This module currently does not:
 
-- Refresh expired OAuth tokens for non-Codex providers (delegated to `hermes auth add <provider>` / `hermes model`; Codex tokens refresh in-memory — see [OpenAI Codex Responses API](#openai-codex-responses-api))
 - Honor `auxiliary.*` provider config from `config.yaml` (Hermes's vision/web-extract/session-search routing)
+- OAuth refresh for Qwen, Spotify, or Google Gemini providers (Codex and Nous Portal handled in-memory — see their dedicated sections above; the other OAuth providers in Hermes don't have demand from the evolution use case yet)
 
 The slim resolver lives at `evolution/core/hermes_provider.py`. The mapping table is sourced from `hermes_cli/auth.py` constants — drift is possible; update by reference when Hermes adds providers.
diff --git a/tests/manual/nous_smoke.py b/tests/manual/nous_smoke.py
new file mode 100644
index 00000000..e52c1146
--- /dev/null
+++ b/tests/manual/nous_smoke.py
@@ -0,0 +1,455 @@
+"""Manual end-to-end smoke for the Nous Portal OAuth + agent_key flow.
+
+Why this exists:
+  We have unit tests for NousLM that mock httpx.Client at the Python
+  layer. Those catch shape bugs but don't validate that the real network
+  call we'd make against `portal.nousresearch.com` carries the right
+  headers, body, and bearer. The user has no Nous Portal account, so
+  we can't run a true end-to-end smoke against the real portal.
+
+What this script does:
+  Spin up a stdlib http.server on a random localhost port that pretends
+  to be the Nous portal. Routes:
+    POST /api/oauth/token       — refresh_token grant
+    POST /api/oauth/agent-key   — agent_key mint
+    POST /v1/chat/completions   — OpenAI-compat inference (so the actual
+                                  inference call returns 200 too)
+
+  Construct a NousLM via the real resolver pointed at the local server,
+  drive several scenarios (initial mint, OAuth refresh + mint, mid-run
+  401 recovery), and assert the recorded HTTP exchange matches expected
+  shape.
+
+How to run:
+  uv run python tests/manual/nous_smoke.py
+
+  Exits 0 on success, prints a recorded-requests summary, and 1 on any
+  failed assertion. Not part of CI — heavyweight (spins up a server)
+  and not needed on every commit.
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import threading
+import time
+from collections import deque
+from datetime import datetime, timezone, timedelta
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Any, Dict, List, Optional
+
+import litellm
+
+# Override module-level URLs BEFORE importing nous_lm so the constants
+# pick up the local server. Real callers would set HERMES_PORTAL_BASE_URL
+# in their shell; we set it here for the in-process smoke.
+import os
+_PORT_HOLDER = {"port": 0}
+
+
+# ---------------------------------------------------------------------------
+# Mock portal server
+# ---------------------------------------------------------------------------
+
+
+class _RecordingHandler(BaseHTTPRequestHandler):
+    """Routes the four endpoints we care about and records every request."""
+
+    recorded: deque = deque()
+    # Behavior knobs flipped per scenario from the main thread.
+    behavior: Dict[str, Any] = {
+        "refresh_status": 200,
+        "refresh_body": None,        # set per scenario
+        "mint_status": 200,
+        "mint_body": None,
+        "mint_call_count": 0,
+        "mint_first_status": None,   # 401 then 200, for the refresh-retry test
+        "infer_status": 200,
+        "infer_call_count": 0,
+        "infer_first_status": None,  # 401 then 200, for the inference-retry test
+    }
+
+    def log_message(self, format, *args):  # silence default access logs
+        pass
+
+    def _read_body(self) -> bytes:
+        length = int(self.headers.get("Content-Length", "0") or "0")
+        return self.rfile.read(length) if length > 0 else b""
+
+    def _record(self, body: bytes) -> Dict[str, Any]:
+        try:
+            parsed = json.loads(body) if body else {}
+        except json.JSONDecodeError:
+            parsed = body.decode("utf-8", errors="replace")
+        entry = {
+            "method": self.command,
+            "path": self.path,
+            "headers": {k: v for k, v in self.headers.items()},
+            "body": parsed,
+        }
+        self.recorded.append(entry)
+        return entry
+
+    def _respond(self, status: int, body: Dict[str, Any]) -> None:
+        payload = json.dumps(body).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(payload)))
+        self.end_headers()
+        self.wfile.write(payload)
+
+    def do_POST(self):  # noqa: N802 — http.server convention
+        body = self._read_body()
+        self._record(body)
+
+        if self.path.endswith("/api/oauth/token"):
+            self._respond(
+                self.behavior["refresh_status"],
+                self.behavior["refresh_body"]
+                or {
+                    "access_token": "REFRESHED-OAUTH",
+                    "refresh_token": "REFRESHED-REFRESH",
+                    "expires_in": 86400,
+                    "token_type": "Bearer",
+                },
+            )
+            return
+
+        if self.path.endswith("/api/oauth/agent-key"):
+            self.behavior["mint_call_count"] += 1
+            # First-call override (used to simulate "stale OAuth → mint 401 → refresh + retry")
+            if (
+                self.behavior["mint_first_status"] is not None
+                and self.behavior["mint_call_count"] == 1
+            ):
+                self._respond(
+                    self.behavior["mint_first_status"],
+                    {"error": "invalid_token"},
+                )
+                return
+            future = datetime.now(tz=timezone.utc) + timedelta(seconds=1800)
+            self._respond(
+                self.behavior["mint_status"],
+                self.behavior["mint_body"]
+                or {
+                    "api_key": f"MINTED-AGENT-KEY-{self.behavior['mint_call_count']}",
+                    "key_id": "test-key-id",
+                    "expires_at": future.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+                    "expires_in": 1800,
+                    "reused": False,
+                },
+            )
+            return
+
+        if "/chat/completions" in self.path:
+            self.behavior["infer_call_count"] += 1
+            if (
+                self.behavior["infer_first_status"] is not None
+                and self.behavior["infer_call_count"] == 1
+            ):
+                self._respond(
+                    self.behavior["infer_first_status"],
+                    {"error": {"code": "invalid_api_key", "message": "401"}},
+                )
+                return
+            self._respond(
+                self.behavior["infer_status"],
+                {
+                    "id": "chatcmpl-test",
+                    "object": "chat.completion",
+                    "created": int(time.time()),
+                    "model": "test-model",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "message": {"role": "assistant", "content": "OK"},
+                            "finish_reason": "stop",
+                        }
+                    ],
+                    "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2},
+                },
+            )
+            return
+
+        self._respond(404, {"error": "unknown_route", "path": self.path})
+
+
+# ---------------------------------------------------------------------------
+# Scenario harness
+# ---------------------------------------------------------------------------
+
+
+def _start_server() -> HTTPServer:
+    server = HTTPServer(("127.0.0.1", 0), _RecordingHandler)
+    _PORT_HOLDER["port"] = server.server_port
+    threading.Thread(target=server.serve_forever, daemon=True).start()
+    return server
+
+
+def _reset_recordings():
+    _RecordingHandler.recorded.clear()
+    _RecordingHandler.behavior.update(
+        {
+            "refresh_status": 200,
+            "refresh_body": None,
+            "mint_status": 200,
+            "mint_body": None,
+            "mint_call_count": 0,
+            "mint_first_status": None,
+            "infer_status": 200,
+            "infer_call_count": 0,
+            "infer_first_status": None,
+        }
+    )
+
+
+def _make_lm(*, port: int, **state):
+    """Construct a NousLM pointed at the local mock server."""
+    from evolution.core.nous_lm import NousLM, _reset_state_for_tests
+
+    _reset_state_for_tests()
+    base_url = f"http://127.0.0.1:{port}"
+    defaults = dict(
+        access_token="seed-oauth",
+        refresh_token="seed-refresh",
+        oauth_expires_at=time.time() + 86400,
+        agent_key=None,
+        agent_key_expires_at=None,
+        portal_base_url=base_url,
+        inference_base_url=f"{base_url}/v1",
+        # cache=False so each smoke scenario actually hits the wire — DSPy's
+        # response cache ignores api_key/api_base in the cache key, which
+        # would otherwise let one scenario's response leak into the next.
+        # num_retries=0 because LiteLLM's internal retry-on-401 would
+        # transparently recover before NousLM.forward's 401 handler sees
+        # the failure, masking our re-mint logic.
+        cache=False,
+        num_retries=0,
+    )
+    defaults.update(state)
+    return NousLM(model="openai/test-model", **defaults)
+
+
+def _summary(label: str) -> str:
+    lines = [f"\n=== {label} ==="]
+    for i, r in enumerate(_RecordingHandler.recorded):
+        auth = r["headers"].get("Authorization", "<none>")
+        body_preview = (
+            json.dumps(r["body"])[:80] if not isinstance(r["body"], str) else r["body"][:80]
+        )
+        lines.append(
+            f"  [{i}] {r['method']} {r['path']:30} auth={auth[:40]:40} body={body_preview}"
+        )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Scenarios
+# ---------------------------------------------------------------------------
+
+
+def scenario_initial_mint(port: int) -> List[str]:
+    """Fresh construction with no agent_key → one mint POST, no refresh."""
+    failures = []
+    _reset_recordings()
+    lm = _make_lm(port=port)
+
+    if _RecordingHandler.behavior["mint_call_count"] != 1:
+        failures.append(
+            f"expected 1 mint call, got {_RecordingHandler.behavior['mint_call_count']}"
+        )
+    refresh_calls = sum(
+        1 for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/token")
+    )
+    if refresh_calls != 0:
+        failures.append(
+            f"OAuth refresh fired unnecessarily ({refresh_calls} call(s))"
+        )
+    mint_record = next(
+        r for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/agent-key")
+    )
+    if mint_record["headers"].get("Authorization") != "Bearer seed-oauth":
+        failures.append(
+            f"mint POST should carry seed access_token as Bearer; got "
+            f"{mint_record['headers'].get('Authorization')}"
+        )
+    if mint_record["body"].get("min_ttl_seconds") != 1800:
+        failures.append(
+            f"mint POST should request 1800s min TTL; got {mint_record['body']}"
+        )
+    if not str(lm.kwargs["api_key"]).startswith("MINTED-AGENT-KEY"):
+        failures.append(
+            f"NousLM api_key should be the minted agent_key; got {lm.kwargs['api_key']}"
+        )
+
+    print(_summary("initial_mint"))
+    return failures
+
+
+def scenario_oauth_expiring_refreshes_then_mints(port: int) -> List[str]:
+    """When OAuth is expiring AND no agent_key, expect refresh THEN mint
+    (in that order), with the mint POST using the refreshed access_token.
+    """
+    failures = []
+    _reset_recordings()
+    lm = _make_lm(port=port, oauth_expires_at=time.time() + 30)
+
+    paths = [r["path"] for r in _RecordingHandler.recorded]
+    if not paths or not paths[0].endswith("/api/oauth/token"):
+        failures.append(
+            f"expected OAuth refresh first; got call sequence {paths}"
+        )
+    if len(paths) < 2 or not paths[1].endswith("/api/oauth/agent-key"):
+        failures.append(
+            f"expected mint as second call; got call sequence {paths}"
+        )
+    if len(_RecordingHandler.recorded) >= 2:
+        mint_record = _RecordingHandler.recorded[1]
+        # The refresh response in our mock returns access_token=REFRESHED-OAUTH;
+        # the mint POST must use it as Bearer (proves the refresh-then-mint
+        # ordering wires correctly).
+        if mint_record["headers"].get("Authorization") != "Bearer REFRESHED-OAUTH":
+            failures.append(
+                f"mint POST should carry REFRESHED-OAUTH as Bearer; got "
+                f"{mint_record['headers'].get('Authorization')}"
+            )
+    # Refresh response should also have rotated the refresh_token in shared state.
+    if lm._shared_state.refresh_token != "REFRESHED-REFRESH":
+        failures.append(
+            f"rotated refresh_token should be persisted; got "
+            f"{lm._shared_state.refresh_token}"
+        )
+
+    print(_summary("oauth_expiring_refreshes_then_mints"))
+    return failures
+
+
+def scenario_inference_uses_minted_agent_key(port: int) -> List[str]:
+    """End-to-end: construct LM (mints), then make a real LiteLLM call.
+    The inference POST's Authorization header must be the MINTED agent_key
+    — proving we're not silently routing the OAuth access_token through
+    as the inference Bearer (the bug this whole PR fixes).
+    """
+    failures = []
+    _reset_recordings()
+    lm = _make_lm(port=port)
+
+    try:
+        lm(messages=[{"role": "user", "content": "hello"}])
+    except Exception as exc:
+        failures.append(f"inference call raised unexpectedly: {type(exc).__name__}: {exc}")
+
+    infer_records = [r for r in _RecordingHandler.recorded if "/chat/completions" in r["path"]]
+    if not infer_records:
+        failures.append("no inference call recorded")
+    else:
+        auth = infer_records[0]["headers"].get("Authorization", "")
+        if not auth.startswith("Bearer MINTED-AGENT-KEY"):
+            failures.append(
+                f"inference Bearer should be the minted agent_key; got {auth}"
+            )
+
+    print(_summary("inference_uses_minted_agent_key"))
+    return failures
+
+
+def scenario_inference_401_triggers_remint_and_retry(port: int) -> List[str]:
+    """Inference 401 (e.g., agent_key revoked mid-run) → force re-mint
+    via NousLM's forward 401 handler, then retry the inference once.
+    """
+    failures = []
+    _reset_recordings()
+    _RecordingHandler.behavior["infer_first_status"] = 401
+    lm = _make_lm(port=port)
+
+    try:
+        lm(messages=[{"role": "user", "content": "hello"}])
+    except Exception as exc:
+        failures.append(f"inference call raised after retry: {type(exc).__name__}: {exc}")
+
+    infer_count = sum(
+        1 for r in _RecordingHandler.recorded if "/chat/completions" in r["path"]
+    )
+    mint_count = sum(
+        1 for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/agent-key")
+    )
+    # Expect: 1 initial mint (constructor), 1 first inference (401), 1 force re-mint, 1 retry inference (200)
+    if infer_count != 2:
+        failures.append(f"expected 2 inference calls (1 fail + 1 retry); got {infer_count}")
+    if mint_count != 2:
+        failures.append(f"expected 2 mint calls (initial + force re-mint); got {mint_count}")
+
+    print(_summary("inference_401_triggers_remint_and_retry"))
+    return failures
+
+
+def scenario_oauth_invalid_grant_surfaces_error(port: int) -> List[str]:
+    """Refresh failure with invalid_grant must raise HermesProviderError
+    pointing operator at `hermes model`.
+    """
+    from evolution.core.hermes_provider import HermesProviderError
+
+    failures = []
+    _reset_recordings()
+    _RecordingHandler.behavior["refresh_status"] = 400
+    _RecordingHandler.behavior["refresh_body"] = {
+        "error": "invalid_grant",
+        "error_description": "refresh token is no longer valid",
+    }
+
+    raised = None
+    try:
+        _make_lm(port=port, oauth_expires_at=time.time() + 30)
+    except HermesProviderError as exc:
+        raised = str(exc)
+
+    if raised is None:
+        failures.append("expected HermesProviderError; nothing raised")
+    elif "hermes model" not in raised:
+        failures.append(f"recovery hint missing from error: {raised}")
+
+    print(_summary("oauth_invalid_grant_surfaces_error"))
+    return failures
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    server = _start_server()
+    port = _PORT_HOLDER["port"]
+    print(f"Mock Nous Portal listening on http://127.0.0.1:{port}")
+
+    # Suppress LiteLLM background telemetry chatter that pollutes the smoke
+    # output without affecting wire-level behavior.
+    litellm.suppress_debug_info = True
+
+    all_failures: List[str] = []
+    for scenario in (
+        scenario_initial_mint,
+        scenario_oauth_expiring_refreshes_then_mints,
+        scenario_inference_uses_minted_agent_key,
+        scenario_inference_401_triggers_remint_and_retry,
+        scenario_oauth_invalid_grant_surfaces_error,
+    ):
+        failures = scenario(port)
+        for f in failures:
+            all_failures.append(f"{scenario.__name__}: {f}")
+
+    server.shutdown()
+
+    print("\n" + "=" * 60)
+    if all_failures:
+        print(f"FAIL: {len(all_failures)} assertion(s) failed:")
+        for f in all_failures:
+            print(f"  - {f}")
+        return 1
+    print("PASS: All assertions passed.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From c58dfa0c810d35abce10b700b13210e262994765 Mon Sep 17 00:00:00 2001
From: Justin Ramos <justin.ramos@gmail.com>
Date: Fri, 15 May 2026 09:44:38 -0600
Subject: [PATCH 5/5] fix(nous_lm): harden against silent failures surfaced in
 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Review pass found a handful of silent-failure paths and one factually
wrong docstring; all addressed here.

oauth_helpers.parse_iso_or_epoch:
  Reject bool, inf, nan, negative, and naive-datetime inputs that would
  silently produce wrong epoch values:
    * inf/nan: every skew check evaluates as "now >= inf" → False, so
      the token would be treated as eternally fresh and never refreshed
    * naive ISO: datetime.timestamp() interprets in the host's local TZ,
      silently corrupting the skew window by hours on non-UTC hosts
    * bool: subclass of int, would coerce True → 1.0 epoch seconds
  Module docstring also rewritten to describe actual current consumer
  (NousLM only — the previous "shared by Codex" claim was aspirational;
  Codex parses expires_at inline as a raw float and doesn't import this).

nous_lm._refresh_oauth and _absorb_mint_response:
  expires_in fields now reject bool explicitly — without the guard,
  expires_in: True is accepted as 1 second (isinstance(True, int) is
  True in Python), triggering perpetual re-mint storms. Also: when both
  expires_at AND expires_in are absent or unusable, the code now logs a
  warning before falling through to the conservative TTL floor, so a
  portal protocol change that drops both fields is at least visible in
  the run log instead of silently caching a key for 30 minutes.

nous_lm.NousLM.__init__:
  HERMES_PORTAL_BASE_URL and NOUS_INFERENCE_BASE_URL env vars are now
  read at instance time, not module-import time. Previously the docs
  advertised both as "overridable" but if anything imported nous_lm
  before the operator set the var, the override was silently ignored.
  The manual smoke harness already worked around this; this fix makes
  the documented behavior actually true.

nous_lm.NousLM.forward / aforward:
  When the post-401 retry ALSO returns 401, wrap with HermesProviderError
  that names the recovery action ("OAuth grant may have been revoked;
  run hermes model"). Previously a bare litellm.AuthenticationError
  propagated with no signal that recovery had been attempted.

nous_lm._SharedNousState.__post_init__:
  Reject construction with agent_key set but agent_key_expires_at None
  (or vice versa). The runtime path defensively treats this as "always
  re-mint" — surfacing the construction-time mistake loudly is cheaper
  than letting it cause silent re-mint storms in production.

hermes_provider._maybe_resolve_nous_lm:
  When the credential pool entry has access_token but neither
  refresh_token NOR agent_key, raise HermesProviderError pointing at
  `hermes model` recovery. This is almost certainly a partial OAuth
  setup (interrupted hermes model run) that would otherwise let the
  caller fall through to direct pass-through and 401 against Nous's
  inference endpoint with no breadcrumb. Pool entries with agent_key
  set still fall through unchanged (genuine inference-only credentials).

Comment-rot cleanup:
  Stripped six `hermes_cli/auth.py:NNNN-NNNN` line-number references
  from nous_lm.py and docs/model_resolution.md. Replaced with symbol
  references. Codex's nous_lm-equivalent (codex_lm.py) got this right
  and Nous followed the wrong precedent; aligning now.

New test coverage (~20 cases):
  Refresh + mint malformed-JSON paths, network-error wrapping (httpx
  ConnectError), OAuth 403 / mint 403 status-code-triggers-relogin,
  agent_key field-name alias, ISO expires_at parsing, bool expires_in
  hits floor, async-path 401 recovery (sync had it; async test
  previously mocked the thing under test), partial-OAuth-setup error
  in the resolver, _SharedNousState __post_init__ guard, and the new
  parse_iso_or_epoch rejection paths.
---
 docs/model_resolution.md          |   2 +-
 evolution/core/hermes_provider.py |  23 +-
 evolution/core/nous_lm.py         | 149 +++++++++----
 evolution/core/oauth_helpers.py   |  54 ++++-
 tests/core/test_nous_lm.py        | 334 +++++++++++++++++++++++++++++-
 tests/core/test_nous_provider.py  |  34 ++-
 6 files changed, 535 insertions(+), 61 deletions(-)

diff --git a/docs/model_resolution.md b/docs/model_resolution.md
index 18277da1..10823482 100644
--- a/docs/model_resolution.md
+++ b/docs/model_resolution.md
@@ -159,7 +159,7 @@ model:
 When the resolver detects a Nous credential pool entry with a `refresh_token` (signals OAuth-managed flow), the framework instantiates a `NousLM` subclass that:
 
 1. **Mints a fresh agent_key at preflight time** by POSTing to `{portal}/api/oauth/agent-key` with the OAuth access_token as Bearer.
-2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing at `hermes_cli/auth.py:3061-3193`.
+2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing in `hermes_cli/auth.py`.
 3. **Re-mints on inference 401** (mid-run agent_key revocation or expiration). The four LM roles (optimizer, reflection, eval, judge) coordinate through a shared lock so a four-thread evolution doesn't race the portal's single-use refresh-token rotation.
 
 The portal URL is overridable via `HERMES_PORTAL_BASE_URL` (Hermes's own env var name; sharing keeps configs portable for stage / mock setups).
diff --git a/evolution/core/hermes_provider.py b/evolution/core/hermes_provider.py
index 8c7c66f9..0a794dba 100644
--- a/evolution/core/hermes_provider.py
+++ b/evolution/core/hermes_provider.py
@@ -753,13 +753,24 @@ def _maybe_resolve_nous_lm(
         )
 
     refresh_token = _str_or_none(pool_entry.get("refresh_token"))
+    agent_key = _str_or_none(pool_entry.get("agent_key"))
     if not refresh_token:
-        # OAuth flow not in play; let the caller use the existing
-        # env-var/access_token-as-Bearer path. Note this path will still
-        # 401 against Nous's inference endpoint when the field actually
-        # holds the OAuth access_token (Nous needs the minted agent_key),
-        # but we can't tell from here whether the user intends OAuth or
-        # env-var, so the conservative posture is "don't change behavior".
+        # An entry with agent_key set is plausibly env-var-style or
+        # hand-edited inference-only — let it fall through to the
+        # generic OpenAI-wire handler with whatever Bearer it carries.
+        # An entry with NEITHER refresh_token NOR agent_key is almost
+        # certainly a partial OAuth setup (interrupted hermes model run,
+        # or the portal handed back access_token only). Inference would
+        # 401 with no breadcrumb pointing at the missing credentials, so
+        # raise here with a specific recovery hint.
+        if agent_key is None:
+            raise HermesProviderError(
+                "~/.hermes/auth.json credential_pool[\"nous\"] entry has "
+                "an access_token but no refresh_token or agent_key — "
+                "looks like a partial OAuth setup. Run `hermes model` "
+                "and select Nous Portal to complete authentication, or "
+                f"pass --{role}-model to bypass Hermes resolution."
+            )
         return None
 
     access_token = _str_or_none(pool_entry.get("access_token"))
diff --git a/evolution/core/nous_lm.py b/evolution/core/nous_lm.py
index ec2df197..8e7feca5 100644
--- a/evolution/core/nous_lm.py
+++ b/evolution/core/nous_lm.py
@@ -10,14 +10,14 @@
      (``inference-api.nousresearch.com``) requires the **agent_key** as
      Bearer — not the access_token.
 
-This module mirrors Hermes's own ``resolve_nous_runtime_credentials`` flow
-at ``hermes_cli/auth.py:3061-3193``: refresh the OAuth token first if
-expiring, then mint a fresh agent_key from it. On inference 401, force
-re-mint and retry once. State is shared across LM instances via
-``_STATE_BY_KEY`` so the four LM roles (optimizer, reflection, eval,
-judge) coordinate through one lock and one mint per refresh window —
-without this, four parallel workers entering the skew window would each
-mint and three would race the portal.
+Mirrors Hermes's ``resolve_nous_runtime_credentials`` flow in
+``hermes_cli/auth.py``: refresh the OAuth token first if expiring, then
+mint a fresh agent_key from it. On inference 401, force re-mint and retry
+once. State is shared across LM instances via ``_STATE_BY_KEY`` so the
+four LM roles (optimizer, reflection, eval, judge) coordinate through
+one lock and one mint per refresh window — without this, four parallel
+workers entering the skew window would each mint and three would race
+the portal's single-use refresh-token rotation.
 
 In-memory only — no auth.json writeback. Long evolutions (>30 min on a
 fresh agent_key) refresh in-process, but the on-disk store stays at
@@ -27,6 +27,7 @@
 
 from __future__ import annotations
 
+import logging
 import os
 import threading
 import time
@@ -40,26 +41,26 @@
 from evolution.core.hermes_provider import HermesProviderError
 from evolution.core.oauth_helpers import parse_iso_or_epoch
 
+_log = logging.getLogger(__name__)
 
-# Mirrors hermes_cli/auth.py:67-72 — reading the same constants keeps
-# us in lockstep with Hermes's own behavior. Override via env vars when
-# pointing at a stage portal or local mock for tests.
-NOUS_PORTAL_BASE_URL = os.getenv(
-    "HERMES_PORTAL_BASE_URL", "https://portal.nousresearch.com"
-)
-NOUS_INFERENCE_BASE_URL = os.getenv(
-    "NOUS_INFERENCE_BASE_URL", "https://inference-api.nousresearch.com/v1"
-)
+
+# Hardcoded defaults; the constructor reads ``HERMES_PORTAL_BASE_URL`` and
+# ``NOUS_INFERENCE_BASE_URL`` env vars at instance time so tests and stage
+# setups can override them post-import. Module-level capture would freeze
+# the values at first import, before any test or operator could intervene.
+NOUS_PORTAL_BASE_URL = "https://portal.nousresearch.com"
+NOUS_INFERENCE_BASE_URL = "https://inference-api.nousresearch.com/v1"
 NOUS_OAUTH_CLIENT_ID = "hermes-cli"
 
 # Refresh OAuth access tokens 2 minutes before they expire and re-mint
-# the inference agent_key 2 minutes before it expires. Hermes uses the
-# same constants at hermes_cli/auth.py:71-72; matching keeps multi-process
-# workloads from racing each other onto the wire.
+# the inference agent_key 2 minutes before it expires. Mirrors Hermes's
+# ``ACCESS_TOKEN_REFRESH_SKEW_SECONDS`` so multi-process workloads don't
+# race each other onto the wire on different cadences.
 OAUTH_REFRESH_SKEW_SECONDS = 120
 AGENT_KEY_REFRESH_SKEW_SECONDS = 120
 # Ask the portal for at least 30 minutes of agent_key TTL on each mint;
-# the portal is free to grant more. Mirrors DEFAULT_AGENT_KEY_MIN_TTL.
+# the portal is free to grant more. Mirrors Hermes's
+# ``DEFAULT_AGENT_KEY_MIN_TTL_SECONDS``.
 AGENT_KEY_MIN_TTL_SECONDS = 30 * 60
 
 
@@ -83,6 +84,19 @@ class _SharedNousState:
     agent_key_expires_at: Optional[float]
     lock: threading.Lock
 
+    def __post_init__(self) -> None:
+        # An agent_key without an expiry trips _agent_key_needs_mint into
+        # "always re-mint" mode, which is defensive but masks the
+        # construction-time mistake of seeding partial state. Pin the
+        # invariant so the failure surfaces loudly at construction.
+        if (self.agent_key and self.agent_key_expires_at is None) or (
+            self.agent_key_expires_at is not None and not self.agent_key
+        ):
+            raise ValueError(
+                "_SharedNousState: agent_key and agent_key_expires_at "
+                "must be set together (or both None)"
+            )
+
     def __deepcopy__(self, memo):
         # NousLM uses dspy.LM.copy() (which deepcopies the whole instance)
         # to apply role-specific kwargs. Locks aren't deep-copyable, and
@@ -148,12 +162,27 @@ def __init__(
         inference_base_url: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
-        kwargs["api_base"] = inference_base_url or NOUS_INFERENCE_BASE_URL
+        # Resolve URLs at construction time (not module-import time) so
+        # tests and stage setups can override via env vars after the
+        # framework is loaded. ``HERMES_PORTAL_BASE_URL`` is Hermes's own
+        # variable name — sharing keeps a single ``export`` portable.
+        effective_portal = (
+            portal_base_url
+            or os.getenv("HERMES_PORTAL_BASE_URL")
+            or NOUS_PORTAL_BASE_URL
+        )
+        effective_inference = (
+            inference_base_url
+            or os.getenv("NOUS_INFERENCE_BASE_URL")
+            or NOUS_INFERENCE_BASE_URL
+        )
+
+        kwargs["api_base"] = effective_inference
         kwargs["api_key"] = agent_key or ""
 
         super().__init__(model=model, **kwargs)
 
-        self._portal_base_url = portal_base_url or NOUS_PORTAL_BASE_URL
+        self._portal_base_url = effective_portal
 
         # The lookup key for shared state — falls back to id(self) so test
         # scenarios with synthetic creds get per-instance isolation rather
@@ -282,18 +311,30 @@ def _refresh_oauth(self) -> None:
                 "Run `hermes model` to re-authenticate."
             )
 
-        # Refresh tokens may rotate (single-use semantics). Honor the new
-        # one if present; missing means the portal kept the original valid.
+        # The Nous portal enforces single-use refresh-token rotation;
+        # honor any rotated token in the response. Missing means the
+        # portal kept the original valid.
         new_refresh = payload.get("refresh_token")
         if isinstance(new_refresh, str) and new_refresh.strip():
             self._shared_state.refresh_token = new_refresh.strip()
 
         expires_in = payload.get("expires_in")
-        if isinstance(expires_in, (int, float)) and expires_in > 0:
+        if (
+            isinstance(expires_in, (int, float))
+            and not isinstance(expires_in, bool)
+            and expires_in > 0
+        ):
             self._shared_state.oauth_expires_at = time.time() + float(expires_in)
         else:
             # Conservative 1h fallback if the field is missing — keeps the
-            # next call from racing to the wire again immediately.
+            # next call from racing to the wire again immediately. Logged
+            # so a portal protocol change that drops expires_in is at
+            # least visible in the run log.
+            _log.warning(
+                "Nous OAuth refresh response had no usable expires_in; "
+                "using 1h fallback. payload keys: %s",
+                sorted(payload.keys()),
+            )
             self._shared_state.oauth_expires_at = time.time() + 3600.0
 
         self._shared_state.access_token = new_access.strip()
@@ -305,9 +346,9 @@ def _refresh_oauth(self) -> None:
     def _mint_agent_key(self, *, allow_oauth_retry: bool) -> None:
         """POST agent-key mint; on 401, optionally refresh OAuth and retry.
 
-        Mirrors Hermes's mint-401-triggers-refresh-retry pattern at
-        ``hermes_cli/auth.py:3122-3174``. ``allow_oauth_retry`` is True on
-        the first call from ``_ensure_credentials``; the recursive retry
+        Mirrors Hermes's mint-401-triggers-refresh-retry pattern in
+        ``hermes_cli/auth.py``. ``allow_oauth_retry`` is True on the
+        first call from ``_ensure_credentials``; the recursive retry
         passes False to bound the recursion at one OAuth refresh.
         """
         try:
@@ -365,10 +406,24 @@ def _absorb_mint_response(self, response: httpx.Response) -> None:
         new_expires_at = parse_iso_or_epoch(payload.get("expires_at"))
         if new_expires_at is None:
             expires_in = payload.get("expires_in")
-            if isinstance(expires_in, (int, float)) and expires_in > 0:
+            if (
+                isinstance(expires_in, (int, float))
+                and not isinstance(expires_in, bool)
+                and expires_in > 0
+            ):
                 new_expires_at = time.time() + float(expires_in)
             else:
-                # Conservative — assume the floor TTL we asked for.
+                # Conservative — assume the floor TTL we asked for. Log
+                # so a portal protocol change that drops both expiry
+                # fields is at least visible in the run log; otherwise
+                # we silently cache a key for 30 minutes regardless of
+                # what the server intended.
+                _log.warning(
+                    "Nous mint response had no usable expires_at or "
+                    "expires_in; using AGENT_KEY_MIN_TTL_SECONDS "
+                    "fallback. payload keys: %s",
+                    sorted(payload.keys()),
+                )
                 new_expires_at = time.time() + AGENT_KEY_MIN_TTL_SECONDS
 
         self._shared_state.agent_key = agent_key.strip()
@@ -385,10 +440,20 @@ def forward(self, prompt=None, messages=None, **kwargs):  # type: ignore[overrid
         except litellm.AuthenticationError:
             # Cached agent_key is dead despite passing the skew check.
             # Force re-mint (which may also refresh OAuth on its own 401)
-            # and retry once. A second 401 propagates so the auth-abort
-            # sentinel + cost-ceiling path catches it.
+            # and retry once. If the freshly-minted key is also rejected
+            # the OAuth grant has likely been revoked entirely; surface
+            # that explicitly so the operator gets the right recovery
+            # hint instead of a generic 401.
             self._force_remint()
-            return super().forward(prompt=prompt, messages=messages, **kwargs)
+            try:
+                return super().forward(prompt=prompt, messages=messages, **kwargs)
+            except litellm.AuthenticationError as exc:
+                raise HermesProviderError(
+                    "Nous Portal inference rejected a freshly-minted "
+                    "agent_key after an automatic re-mint. The OAuth "
+                    "grant may have been revoked. Run `hermes model` "
+                    "and select Nous Portal to re-authenticate."
+                ) from exc
 
     async def aforward(self, prompt=None, messages=None, **kwargs):  # type: ignore[override]
         self._ensure_credentials()
@@ -396,7 +461,17 @@ async def aforward(self, prompt=None, messages=None, **kwargs):  # type: ignore[
             return await super().aforward(prompt=prompt, messages=messages, **kwargs)
         except litellm.AuthenticationError:
             self._force_remint()
-            return await super().aforward(prompt=prompt, messages=messages, **kwargs)
+            try:
+                return await super().aforward(
+                    prompt=prompt, messages=messages, **kwargs
+                )
+            except litellm.AuthenticationError as exc:
+                raise HermesProviderError(
+                    "Nous Portal inference rejected a freshly-minted "
+                    "agent_key after an automatic re-mint. The OAuth "
+                    "grant may have been revoked. Run `hermes model` "
+                    "and select Nous Portal to re-authenticate."
+                ) from exc
 
 
 # ----------------------------------------------------------------------
@@ -410,7 +485,7 @@ async def aforward(self, prompt=None, messages=None, **kwargs):  # type: ignore[
 
 def _format_oauth_error(response: httpx.Response) -> str:
     """Translate a non-200 OAuth refresh response into an actionable user
-    message. Mirrors hermes_cli/auth.py:2595-2624.
+    message. Mirrors the OAuth-error classification in ``hermes_cli/auth.py``.
     """
     code, detail = _parse_error_body(response)
 
diff --git a/evolution/core/oauth_helpers.py b/evolution/core/oauth_helpers.py
index 7efbe5e6..8ac15fa2 100644
--- a/evolution/core/oauth_helpers.py
+++ b/evolution/core/oauth_helpers.py
@@ -1,12 +1,14 @@
-"""Shared OAuth helpers used by Codex and Nous LM wrappers.
+"""OAuth helper utilities for NousLM.
 
-Kept as a small standalone module so the next OAuth provider that needs
-in-memory refresh has somewhere obvious to drop shared utilities without
-bloating either provider's LM file.
+Currently only consumed by ``NousLM`` and ``_maybe_resolve_nous_lm``.
+Lives as its own module so the next OAuth provider that needs in-memory
+refresh has somewhere obvious to drop shared utilities without bloating
+either consumer's file.
 """
 
 from __future__ import annotations
 
+import math
 from datetime import datetime
 from typing import Any, Optional
 
@@ -22,13 +24,19 @@ def parse_iso_or_epoch(value: Any) -> Optional[float]:
       * Older or hand-edited entries may omit it entirely.
 
     Returns the equivalent Unix epoch float, or None when the value is
-    missing, malformed, or has no parseable shape. Callers treat None as
-    "unknown" — typically meaning "trigger a refresh" defensively.
+    missing, malformed, has no parseable shape, or fails sanity checks
+    (non-finite, negative, or naive datetime that would silently pull
+    in the host TZ instead of the intended UTC).
+
+    Callers treat None as "unknown" — typically meaning "trigger a
+    refresh" defensively.
     """
-    if value is None:
+    if value is None or isinstance(value, bool):
+        # bool is a subclass of int; reject before the numeric path so a
+        # stray ``True`` isn't silently turned into 1.0 epoch seconds.
         return None
     if isinstance(value, (int, float)):
-        return float(value)
+        return _validated(float(value))
     if isinstance(value, str):
         s = value.strip()
         if not s:
@@ -38,12 +46,36 @@ def parse_iso_or_epoch(value: Any) -> Optional[float]:
         if s.endswith("Z"):
             s = s[:-1] + "+00:00"
         try:
-            return datetime.fromisoformat(s).timestamp()
+            dt = datetime.fromisoformat(s)
         except ValueError:
-            pass
+            dt = None
+        if dt is not None:
+            if dt.tzinfo is None:
+                # Naive datetime: Python's .timestamp() would interpret
+                # in the host's local timezone, which silently corrupts
+                # the skew window by hours on non-UTC hosts. Reject so
+                # the caller treats it as "unknown" rather than producing
+                # a confidently-wrong epoch.
+                return None
+            return _validated(dt.timestamp())
         # Numeric-looking string ("1747299600") — treat as epoch seconds.
         try:
-            return float(s)
+            return _validated(float(s))
         except ValueError:
             return None
     return None
+
+
+def _validated(epoch: float) -> Optional[float]:
+    """Reject inf, nan, and negative epoch values.
+
+    ``inf`` would make every skew check evaluate ``something >= inf`` →
+    False, so the token would be treated as eternally fresh and never
+    refreshed (silent "wrong answer" failure). ``nan`` has the same
+    failure mode (all comparisons against nan are False). Negatives are
+    structurally absurd for an expires_at and most likely indicate a
+    parser bug upstream.
+    """
+    if not math.isfinite(epoch) or epoch < 0:
+        return None
+    return epoch
diff --git a/tests/core/test_nous_lm.py b/tests/core/test_nous_lm.py
index 156b8b35..25c16caa 100644
--- a/tests/core/test_nous_lm.py
+++ b/tests/core/test_nous_lm.py
@@ -18,7 +18,7 @@
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import httpx
 import litellm
@@ -326,7 +326,11 @@ def test_forward_propagates_second_401(self):
             mock_cls.return_value = _mock_httpx_post(
                 [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})]
             )
-            with pytest.raises(litellm.AuthenticationError):
+            # The second 401 (after re-mint) is wrapped as a
+            # HermesProviderError that names the recovery action ("OAuth
+            # grant may have been revoked"), so the operator gets a
+            # specific message instead of a bare 401.
+            with pytest.raises(HermesProviderError, match="re-mint"):
                 lm.forward(messages=[{"role": "user", "content": "hi"}])
 
 
@@ -484,6 +488,298 @@ def test_format_mint_error_extracts_openai_shape(self):
 # ---------------------------------------------------------------------------
 
 
+class TestResponseShapeEdgeCases:
+    """Coverage for protocol-rev edge cases that the unit tests originally
+    glossed over: malformed JSON, alternate field names, fallback TTLs,
+    and the bool-as-numeric trap.
+    """
+
+    def test_refresh_response_missing_access_token_raises(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={"refresh_token": "x", "expires_in": 3600}
+                    )
+                ]
+            )
+            with pytest.raises(HermesProviderError, match="missing access_token"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 30,  # forces refresh
+                )
+
+    def test_refresh_response_malformed_json_raises(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(status_code=200, json_body=None)]  # .json() raises
+            )
+            with pytest.raises(HermesProviderError, match="invalid JSON"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 30,
+                )
+
+    def test_mint_response_missing_api_key_raises(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"expires_in": 1800})]
+            )
+            with pytest.raises(HermesProviderError, match="missing api_key"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="oauth",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 86400,
+                )
+
+    def test_mint_response_malformed_json_raises(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(status_code=200, json_body=None)]
+            )
+            with pytest.raises(HermesProviderError, match="invalid JSON"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="oauth",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 86400,
+                )
+
+    def test_mint_response_uses_agent_key_field_alias(self):
+        # The portal historically used `agent_key`; current shape is
+        # `api_key`. Forward/back compat: either should work.
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={
+                            "agent_key": "minted-via-old-shape",
+                            "expires_in": 1800,
+                        }
+                    )
+                ]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth",
+                refresh_token="r",
+                oauth_expires_at=time.time() + 86400,
+            )
+            assert lm.kwargs["api_key"] == "minted-via-old-shape"
+
+    def test_mint_response_iso_expires_at_parsed(self):
+        # The current portal returns expires_at as ISO 8601; verify the
+        # parser flows through without falling to the expires_in branch.
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={
+                            "api_key": "minted",
+                            "expires_at": "2026-12-01T00:00:00+00:00",
+                            # expires_in omitted on purpose — exercises the
+                            # expires_at-wins-when-both-present branch
+                        }
+                    )
+                ]
+            )
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth",
+                refresh_token="r",
+                oauth_expires_at=time.time() + 86400,
+            )
+            # 2026-12-01T00:00:00+00:00 → epoch 1796083200
+            assert lm._shared_state.agent_key_expires_at == 1796083200.0
+
+    def test_mint_response_bool_expires_in_falls_to_floor(self):
+        # isinstance(True, int) is True in Python; without explicit
+        # bool exclusion, True would be cached as a 1-second TTL,
+        # triggering perpetual re-mint. The bool guard pushes us to
+        # the conservative 30-min floor instead.
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        json_body={"api_key": "minted", "expires_in": True}
+                    )
+                ]
+            )
+            before = time.time()
+            lm = NousLM(
+                model="openai/test-model",
+                access_token="oauth",
+                refresh_token="r",
+                oauth_expires_at=time.time() + 86400,
+            )
+            # 30-minute floor, not 1 second.
+            assert lm._shared_state.agent_key_expires_at >= before + 1700
+
+
+class TestNetworkErrorWrapping:
+    def test_refresh_httpx_error_wrapped(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            client = MagicMock()
+            client.__enter__.return_value = client
+            client.post.side_effect = httpx.ConnectError("dns failure")
+            mock_cls.return_value = client
+            with pytest.raises(HermesProviderError, match="OAuth refresh"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 30,
+                )
+
+    def test_mint_httpx_error_wrapped(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            client = MagicMock()
+            client.__enter__.return_value = client
+            client.post.side_effect = httpx.ConnectError("dns failure")
+            mock_cls.return_value = client
+            with pytest.raises(HermesProviderError, match="agent-key mint"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="oauth",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 86400,
+                )
+
+
+class TestStatusCodeRelogin:
+    def test_oauth_403_triggers_relogin_even_with_unknown_code(self):
+        # _format_oauth_error special-cases 401/403 status to force the
+        # relogin message even when the JSON error code isn't in the
+        # known-relogin set. Catches portal returns like a tenant-disabled
+        # 403 with code="access_denied".
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        status_code=403,
+                        json_body={"error": "access_denied"},
+                    )
+                ]
+            )
+            with pytest.raises(HermesProviderError, match="hermes model"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="stale",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 30,
+                )
+
+    def test_mint_403_triggers_relogin(self):
+        # Mint 401 has the refresh-retry path; mint 403 doesn't (it
+        # signals tenant-side denial that won't recover from a fresh
+        # access_token). Should surface the relogin message directly.
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [
+                    _mock_response(
+                        status_code=403,
+                        json_body={"error": "tenant_suspended"},
+                    )
+                ]
+            )
+            with pytest.raises(HermesProviderError, match="hermes model"):
+                NousLM(
+                    model="openai/test-model",
+                    access_token="oauth",
+                    refresh_token="r",
+                    oauth_expires_at=time.time() + 86400,
+                )
+
+
+class TestAsyncForce401Recovery:
+    """The sync path has explicit retry + propagate tests; the async
+    path has neither equivalent. Mirror them to keep the two paths
+    from drifting silently.
+    """
+
+    def _build_lm_with_initial_mint(self):
+        with patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "first-mint", "expires_in": 1800})]
+            )
+            return NousLM(
+                model="openai/test-model",
+                access_token="oauth-tok",
+                refresh_token="refresh-tok",
+                oauth_expires_at=time.time() + 86400,
+            )
+
+    def test_aforward_recovers_from_401_with_remint_and_retry(self):
+        lm = self._build_lm_with_initial_mint()
+        with patch("dspy.LM.aforward", new_callable=AsyncMock) as mock_super, \
+             patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            err = litellm.AuthenticationError(
+                message="401",
+                llm_provider="openai",
+                model="openai/test-model",
+            )
+            # AsyncMock with a list side_effect raises exceptions in
+            # sequence and returns non-exception items as the await value.
+            mock_super.side_effect = [err, "ok"]
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "post-401-mint", "expires_in": 1800})]
+            )
+            result = asyncio.run(
+                lm.aforward(messages=[{"role": "user", "content": "hi"}])
+            )
+            assert result == "ok"
+            assert mock_super.await_count == 2
+            assert lm.kwargs["api_key"] == "post-401-mint"
+
+    def test_aforward_propagates_second_401_as_hermes_provider_error(self):
+        lm = self._build_lm_with_initial_mint()
+        with patch("dspy.LM.aforward", new_callable=AsyncMock) as mock_super, \
+             patch("evolution.core.nous_lm.httpx.Client") as mock_cls:
+            err = litellm.AuthenticationError(
+                message="401", llm_provider="openai", model="openai/test-model"
+            )
+            mock_super.side_effect = [err, err]
+            mock_cls.return_value = _mock_httpx_post(
+                [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})]
+            )
+            with pytest.raises(HermesProviderError, match="re-mint"):
+                asyncio.run(
+                    lm.aforward(messages=[{"role": "user", "content": "hi"}])
+                )
+
+
+class TestSharedStateInvariants:
+    def test_post_init_rejects_partial_agent_key_state(self):
+        # _SharedNousState __post_init__ catches the construction-time
+        # mistake of seeding agent_key without a paired expires_at —
+        # which would otherwise silently force perpetual re-mint.
+        from evolution.core.nous_lm import _SharedNousState
+
+        with pytest.raises(ValueError, match="set together"):
+            _SharedNousState(
+                access_token="x",
+                refresh_token="r",
+                oauth_expires_at=None,
+                agent_key="orphan-key",
+                agent_key_expires_at=None,
+                lock=threading.Lock(),
+            )
+        with pytest.raises(ValueError, match="set together"):
+            _SharedNousState(
+                access_token="x",
+                refresh_token="r",
+                oauth_expires_at=None,
+                agent_key=None,
+                agent_key_expires_at=time.time() + 1800,
+                lock=threading.Lock(),
+            )
+
+
 class TestParseIsoOrEpoch:
     def test_iso8601_with_offset(self):
         result = parse_iso_or_epoch("2026-05-15T10:30:00+00:00")
@@ -512,3 +808,37 @@ def test_empty_string_returns_none(self):
 
     def test_garbage_returns_none(self):
         assert parse_iso_or_epoch("not-a-timestamp") is None
+
+    def test_inf_returns_none(self):
+        # inf would silently make every skew check evaluate as
+        # "something >= inf" → False, so the token would be treated as
+        # eternally fresh. The validator must reject.
+        assert parse_iso_or_epoch(float("inf")) is None
+        assert parse_iso_or_epoch("inf") is None
+        assert parse_iso_or_epoch("Infinity") is None
+
+    def test_nan_returns_none(self):
+        # All comparisons against nan are False — same eternal-freshness
+        # trap as inf.
+        assert parse_iso_or_epoch(float("nan")) is None
+        assert parse_iso_or_epoch("nan") is None
+
+    def test_negative_returns_none(self):
+        # Structurally absurd for an expires_at; usually a parser bug
+        # upstream. Reject so the caller treats as "unknown".
+        assert parse_iso_or_epoch(-100) is None
+        assert parse_iso_or_epoch(-0.5) is None
+
+    def test_naive_iso_returns_none(self):
+        # datetime.fromisoformat("2026-05-15T10:30:00") returns a naive
+        # datetime; .timestamp() then interprets in the host's local TZ,
+        # silently corrupting the skew window by hours on non-UTC hosts.
+        # The validator rejects naive datetimes so the caller treats as
+        # "unknown" rather than producing a confidently-wrong epoch.
+        assert parse_iso_or_epoch("2026-05-15T10:30:00") is None
+
+    def test_bool_returns_none(self):
+        # bool subclasses int; without an explicit reject, True/False
+        # would silently coerce to 1.0 / 0.0 epoch — meaningless.
+        assert parse_iso_or_epoch(True) is None
+        assert parse_iso_or_epoch(False) is None
diff --git a/tests/core/test_nous_provider.py b/tests/core/test_nous_provider.py
index 1ee4ec6f..f480edc9 100644
--- a/tests/core/test_nous_provider.py
+++ b/tests/core/test_nous_provider.py
@@ -162,12 +162,14 @@ def test_custom_inference_base_url_flows_through(self, hermes_home):
 
 
 class TestNousResolutionFallbacks:
-    def test_pool_entry_without_refresh_token_falls_back_to_direct(
+    def test_pool_entry_with_agent_key_no_refresh_falls_back_to_direct(
         self, hermes_home
     ):
-        # Env-var-style: pool has only access_token, no refresh_token.
-        # The resolver must fall through to the existing OpenAI-wire
-        # direct-pass-through path so a NOUS_API_KEY user keeps working.
+        # Hand-edited or inference-only entry: has access_token + agent_key
+        # but no refresh_token. The resolver must fall through to the
+        # existing OpenAI-wire direct-pass-through path. The agent_key
+        # presence signals "this is an inference-ready credential, not a
+        # partial OAuth setup."
         _write_config(
             hermes_home,
             """
@@ -181,6 +183,7 @@ def test_pool_entry_without_refresh_token_falls_back_to_direct(
             access_token="bare-api-key",
             refresh_token=None,
             oauth_expires_at=None,
+            agent_key="inference-ready-bearer",
         )
         resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home)
         # Direct pass-through path: openai/<model>, api_base + api_key in lm_kwargs,
@@ -189,6 +192,29 @@ def test_pool_entry_without_refresh_token_falls_back_to_direct(
         assert resolved.model == "openai/Hermes-4-405B"
         assert resolved.lm_kwargs.get("api_key") == "bare-api-key"
 
+    def test_pool_entry_without_refresh_or_agent_key_raises(self, hermes_home):
+        # Partial OAuth setup: pool entry has access_token but no
+        # refresh_token AND no agent_key. Almost certainly an interrupted
+        # `hermes model` run. Raising here gives the operator a specific
+        # recovery hint instead of letting inference 401 with no breadcrumb.
+        _write_config(
+            hermes_home,
+            """
+            model:
+              default: Hermes-4-405B
+              provider: nous
+            """,
+        )
+        _write_nous_pool(
+            hermes_home,
+            access_token="oauth-only",
+            refresh_token=None,
+            oauth_expires_at=None,
+            agent_key=None,
+        )
+        with pytest.raises(HermesProviderError, match="partial OAuth setup"):
+            resolve_default_lm(role="optimizer", hermes_home=hermes_home)
+
     def test_missing_pool_entry_surfaces_recovery_hint(self, hermes_home):
         _write_config(
             hermes_home,