From 1ad08d668a6561ad3ad95acaafa02961de27254d Mon Sep 17 00:00:00 2001 From: Justin Ramos Date: Fri, 15 May 2026 08:51:00 -0600 Subject: [PATCH 1/5] chore: gitignore output/ run logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The output/ directory holds per-run logs, evolved artifacts, and gate decisions — all generated, none committable. Previous gitignore pattern output/**/*.md only excluded markdown, leaving .log and .json files showing as untracked in every git status, which has been quiet noise across the recent provider work. --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 974099aa..d5436f8b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,8 @@ datasets/**/*.jsonl datasets/**/*.json !datasets/.gitkeep -# Output files from run -output/**/*.md +# Output files from run (per-run logs, evolved artifacts, gate decisions) +output/ # Evolution snapshots snapshots/ From b2dfe239619b0fd4271b394d079733c2da4b3ba6 Mon Sep 17 00:00:00 2001 From: Justin Ramos Date: Fri, 15 May 2026 08:57:38 -0600 Subject: [PATCH 2/5] feat(nous_lm): NousLM with OAuth refresh + agent_key minting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A user with provider: nous in ~/.hermes/config.yaml currently can't actually use the framework — the resolver reads the OAuth access_token from the credential pool and hands that to LiteLLM as the inference Bearer, but Nous's inference endpoint requires the short-lived agent_key (a separate credential minted via POST /api/oauth/agent-key). This commit provides the LM subclass that handles the two-stage credential model. Mirrors hermes_cli/auth.py:3061-3193 (resolve_nous_runtime_credentials): * Refresh OAuth access_token in-memory when within 120s of expiry via POST {portal}/api/oauth/token (standard refresh_token grant with client_id="hermes-cli") * Mint a fresh agent_key when missing or within 120s of expiry via POST {portal}/api/oauth/agent-key (Bearer access_token, ask for 1800s min TTL) * Refresh-first-then-mint sequencing so a stale access_token doesn't cause mint failures * Mint 401 → refresh OAuth once and retry mint (Hermes pattern) * Inference 401 → force re-mint and retry once (mid-run recovery) Cross-instance state sharing via _SharedNousState keyed by initial refresh_token. The four LM roles (optimizer, reflection, eval, judge) share the lock + state, so a four-thread evolution doesn't trigger four parallel mints (which would race the portal's single-use refresh-token rotation and produce refresh_token_reused errors on three of them). In-memory only — no auth.json writeback. Long evolutions (>30 min on a fresh agent_key) refresh + re-mint in-process; the on-disk store stays at whatever `hermes model` last wrote. Avoids write-conflict surface with concurrent Hermes sessions that may also be refreshing. Error classification mirrors Hermes's own (auth.py:2595-2624): invalid_grant/invalid_token + HTTP 401/403 from OAuth endpoint surface HermesProviderError with `hermes model` recovery hint; refresh_token_reused gets the special "another client consumed it" message; mint failures translate similarly. oauth_helpers.parse_iso_or_epoch handles both Nous's ISO 8601 expires_at and Codex's Unix epoch float — kept in a small standalone module so the next OAuth provider has somewhere obvious to extend without bloating either provider's LM file. --- evolution/core/nous_lm.py | 480 +++++++++++++++++++++++++++++ evolution/core/oauth_helpers.py | 49 +++ tests/core/test_nous_lm.py | 514 ++++++++++++++++++++++++++++++++ 3 files changed, 1043 insertions(+) create mode 100644 evolution/core/nous_lm.py create mode 100644 evolution/core/oauth_helpers.py create mode 100644 tests/core/test_nous_lm.py diff --git a/evolution/core/nous_lm.py b/evolution/core/nous_lm.py new file mode 100644 index 00000000..ec2df197 --- /dev/null +++ b/evolution/core/nous_lm.py @@ -0,0 +1,480 @@ +"""DSPy LM subclass for Nous Portal — OAuth refresh + agent_key minting. + +Nous Portal uses a two-stage credential model that's meaningfully different +from Codex: + + 1. **OAuth access_token** (long-lived, days). Refreshable via the standard + refresh_token grant at ``{portal}/api/oauth/token``. + 2. **agent_key** (short-lived, ~30 minutes). Minted from the access_token + by POSTing to ``{portal}/api/oauth/agent-key``. The inference endpoint + (``inference-api.nousresearch.com``) requires the **agent_key** as + Bearer — not the access_token. + +This module mirrors Hermes's own ``resolve_nous_runtime_credentials`` flow +at ``hermes_cli/auth.py:3061-3193``: refresh the OAuth token first if +expiring, then mint a fresh agent_key from it. On inference 401, force +re-mint and retry once. State is shared across LM instances via +``_STATE_BY_KEY`` so the four LM roles (optimizer, reflection, eval, +judge) coordinate through one lock and one mint per refresh window — +without this, four parallel workers entering the skew window would each +mint and three would race the portal. + +In-memory only — no auth.json writeback. Long evolutions (>30 min on a +fresh agent_key) refresh in-process, but the on-disk store stays at +whatever ``hermes model`` last wrote. Avoids the write-conflict surface +with concurrent Hermes sessions that may also be refreshing. +""" + +from __future__ import annotations + +import os +import threading +import time +from dataclasses import dataclass +from typing import Any, Dict, Optional + +import dspy +import httpx +import litellm + +from evolution.core.hermes_provider import HermesProviderError +from evolution.core.oauth_helpers import parse_iso_or_epoch + + +# Mirrors hermes_cli/auth.py:67-72 — reading the same constants keeps +# us in lockstep with Hermes's own behavior. Override via env vars when +# pointing at a stage portal or local mock for tests. +NOUS_PORTAL_BASE_URL = os.getenv( + "HERMES_PORTAL_BASE_URL", "https://portal.nousresearch.com" +) +NOUS_INFERENCE_BASE_URL = os.getenv( + "NOUS_INFERENCE_BASE_URL", "https://inference-api.nousresearch.com/v1" +) +NOUS_OAUTH_CLIENT_ID = "hermes-cli" + +# Refresh OAuth access tokens 2 minutes before they expire and re-mint +# the inference agent_key 2 minutes before it expires. Hermes uses the +# same constants at hermes_cli/auth.py:71-72; matching keeps multi-process +# workloads from racing each other onto the wire. +OAUTH_REFRESH_SKEW_SECONDS = 120 +AGENT_KEY_REFRESH_SKEW_SECONDS = 120 +# Ask the portal for at least 30 minutes of agent_key TTL on each mint; +# the portal is free to grant more. Mirrors DEFAULT_AGENT_KEY_MIN_TTL. +AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 + + +@dataclass +class _SharedNousState: + """OAuth + agent_key state shared across NousLM instances for the same + Nous account. + + Keyed in ``_STATE_BY_KEY`` by the initial refresh_token observed at + construction. All NousLMs created from the same resolver factory share + the same key, so a refresh or mint by any one of them is visible to + the others — without this, four parallel workers entering the skew + window simultaneously would each POST refresh+mint and three would + receive ``refresh_token_reused`` from the portal. + """ + + access_token: str + refresh_token: str + oauth_expires_at: Optional[float] + agent_key: Optional[str] + agent_key_expires_at: Optional[float] + lock: threading.Lock + + def __deepcopy__(self, memo): + # NousLM uses dspy.LM.copy() (which deepcopies the whole instance) + # to apply role-specific kwargs. Locks aren't deep-copyable, and + # the *point* of shared state is to be shared. A copied NousLM + # must observe refreshes/mints performed against the original, so + # the copy keeps the same _SharedNousState reference. + return self + + +_STATE_BY_KEY: Dict[str, _SharedNousState] = {} +_STATE_REGISTRY_LOCK = threading.Lock() + + +def _get_or_register_state( + *, + key: str, + access_token: str, + refresh_token: str, + oauth_expires_at: Optional[float], + agent_key: Optional[str], + agent_key_expires_at: Optional[float], +) -> _SharedNousState: + """Register a new shared state on first observation; return the existing + one on subsequent calls. The first instance's OAuth values win — they're + the freshest at startup and any later instance with the same key was + constructed from the same source. + """ + with _STATE_REGISTRY_LOCK: + if key not in _STATE_BY_KEY: + _STATE_BY_KEY[key] = _SharedNousState( + access_token=access_token, + refresh_token=refresh_token, + oauth_expires_at=oauth_expires_at, + agent_key=agent_key, + agent_key_expires_at=agent_key_expires_at, + lock=threading.Lock(), + ) + return _STATE_BY_KEY[key] + + +def _reset_state_for_tests() -> None: + """Test-only: clear the module-level state cache so each test starts + from a clean slate. Tests that share state across cases would observe + refreshes/mints from prior tests bleeding through. + """ + with _STATE_REGISTRY_LOCK: + _STATE_BY_KEY.clear() + + +class NousLM(dspy.LM): + """DSPy LM for Nous Portal — handles OAuth refresh + agent_key minting.""" + + def __init__( + self, + model: str, + *, + access_token: str, + refresh_token: str, + oauth_expires_at: Optional[float] = None, + agent_key: Optional[str] = None, + agent_key_expires_at: Optional[float] = None, + portal_base_url: Optional[str] = None, + inference_base_url: Optional[str] = None, + **kwargs: Any, + ) -> None: + kwargs["api_base"] = inference_base_url or NOUS_INFERENCE_BASE_URL + kwargs["api_key"] = agent_key or "" + + super().__init__(model=model, **kwargs) + + self._portal_base_url = portal_base_url or NOUS_PORTAL_BASE_URL + + # The lookup key for shared state — falls back to id(self) so test + # scenarios with synthetic creds get per-instance isolation rather + # than colliding on the empty-string key. + self._state_key = refresh_token or f"no-refresh:{id(self)}" + self._shared_state = _get_or_register_state( + key=self._state_key, + access_token=access_token, + refresh_token=refresh_token, + oauth_expires_at=oauth_expires_at, + agent_key=agent_key, + agent_key_expires_at=agent_key_expires_at, + ) + + # Initial mint if the constructor-supplied agent_key is missing or + # already expiring. Cheap on the happy path; one POST otherwise. + self._ensure_credentials() + + # ------------------------------------------------------------------ + # Refresh + mint orchestration + # ------------------------------------------------------------------ + + def _oauth_needs_refresh(self) -> bool: + if self._shared_state.oauth_expires_at is None: + # Unknown expiry → don't speculatively refresh; let the mint + # call surface a 401 if the access_token is actually dead. + return False + return ( + time.time() + OAUTH_REFRESH_SKEW_SECONDS + >= self._shared_state.oauth_expires_at + ) + + def _agent_key_needs_mint(self) -> bool: + if not self._shared_state.agent_key: + return True + if self._shared_state.agent_key_expires_at is None: + # Have a key but no expiry — treat as needing re-mint to be + # safe. Cheaper than letting it 401 mid-run. + return True + return ( + time.time() + AGENT_KEY_REFRESH_SKEW_SECONDS + >= self._shared_state.agent_key_expires_at + ) + + def _sync_from_shared_state(self) -> None: + """Pull the latest agent_key out of shared state into self.kwargs.""" + self.kwargs["api_key"] = self._shared_state.agent_key or "" + + def _ensure_credentials(self) -> None: + """Acquire the per-account lock; refresh OAuth and/or mint as needed. + + Double-checked locking: when N threads enter the skew window + simultaneously, only the first one performs the HTTP round-trip; + the others observe the updated state after acquiring the lock and + return without touching the network. + """ + if not self._oauth_needs_refresh() and not self._agent_key_needs_mint(): + self._sync_from_shared_state() + return + + with self._shared_state.lock: + if self._oauth_needs_refresh(): + self._refresh_oauth() + if self._agent_key_needs_mint(): + self._mint_agent_key(allow_oauth_retry=True) + self._sync_from_shared_state() + + def _force_remint(self) -> None: + """Skip skew check and re-mint immediately. Called when an inference + call returned 401 — the cached agent_key is bad and we don't want + to wait for the skew window. + """ + with self._shared_state.lock: + self._mint_agent_key(allow_oauth_retry=True) + self._sync_from_shared_state() + + # ------------------------------------------------------------------ + # OAuth refresh + # ------------------------------------------------------------------ + + def _refresh_oauth(self) -> None: + """POST refresh_token grant; on success, mutate shared state.""" + if not self._shared_state.refresh_token: + raise HermesProviderError( + "Nous Portal access token is expiring but no refresh_token " + "is available. Run `hermes model` and select Nous Portal " + "to re-authenticate." + ) + + try: + with httpx.Client(timeout=httpx.Timeout(20.0)) as client: + response = client.post( + f"{self._portal_base_url}/api/oauth/token", + headers={ + "Accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded", + }, + data={ + "grant_type": "refresh_token", + "client_id": NOUS_OAUTH_CLIENT_ID, + "refresh_token": self._shared_state.refresh_token, + }, + ) + except httpx.HTTPError as exc: + raise HermesProviderError( + f"Nous Portal OAuth refresh failed ({exc}). Check network " + f"connectivity, then re-try; if the failure persists, run " + f"`hermes model` to re-authenticate." + ) from exc + + if response.status_code != 200: + raise HermesProviderError(_format_oauth_error(response)) + + try: + payload = response.json() + except ValueError as exc: + raise HermesProviderError( + "Nous Portal OAuth refresh returned invalid JSON. " + "Run `hermes model` to re-authenticate." + ) from exc + + new_access = payload.get("access_token") + if not isinstance(new_access, str) or not new_access.strip(): + raise HermesProviderError( + "Nous Portal OAuth refresh response was missing access_token. " + "Run `hermes model` to re-authenticate." + ) + + # Refresh tokens may rotate (single-use semantics). Honor the new + # one if present; missing means the portal kept the original valid. + new_refresh = payload.get("refresh_token") + if isinstance(new_refresh, str) and new_refresh.strip(): + self._shared_state.refresh_token = new_refresh.strip() + + expires_in = payload.get("expires_in") + if isinstance(expires_in, (int, float)) and expires_in > 0: + self._shared_state.oauth_expires_at = time.time() + float(expires_in) + else: + # Conservative 1h fallback if the field is missing — keeps the + # next call from racing to the wire again immediately. + self._shared_state.oauth_expires_at = time.time() + 3600.0 + + self._shared_state.access_token = new_access.strip() + + # ------------------------------------------------------------------ + # Agent_key minting + # ------------------------------------------------------------------ + + def _mint_agent_key(self, *, allow_oauth_retry: bool) -> None: + """POST agent-key mint; on 401, optionally refresh OAuth and retry. + + Mirrors Hermes's mint-401-triggers-refresh-retry pattern at + ``hermes_cli/auth.py:3122-3174``. ``allow_oauth_retry`` is True on + the first call from ``_ensure_credentials``; the recursive retry + passes False to bound the recursion at one OAuth refresh. + """ + try: + with httpx.Client(timeout=httpx.Timeout(20.0)) as client: + response = client.post( + f"{self._portal_base_url}/api/oauth/agent-key", + headers={ + "Accept": "application/json", + "Content-Type": "application/json", + "Authorization": f"Bearer {self._shared_state.access_token}", + }, + json={"min_ttl_seconds": AGENT_KEY_MIN_TTL_SECONDS}, + ) + except httpx.HTTPError as exc: + raise HermesProviderError( + f"Nous Portal agent-key mint failed ({exc}). Check network " + f"connectivity, then re-try; if the failure persists, run " + f"`hermes model` to re-authenticate." + ) from exc + + if response.status_code == 200: + self._absorb_mint_response(response) + return + + # 401 from mint → access_token may be stale even though OAuth said + # it's still valid. Refresh once and retry. After that, give up. + if response.status_code == 401 and allow_oauth_retry: + self._refresh_oauth() + self._mint_agent_key(allow_oauth_retry=False) + return + + raise HermesProviderError(_format_mint_error(response)) + + def _absorb_mint_response(self, response: httpx.Response) -> None: + try: + payload = response.json() + except ValueError as exc: + raise HermesProviderError( + "Nous Portal agent-key mint returned invalid JSON. " + "Run `hermes model` to re-authenticate." + ) from exc + + # Hermes uses both ``api_key`` (current portal field) and falls back + # to ``agent_key`` (older shape). Mirror both so a portal protocol + # rev doesn't break us. + agent_key = payload.get("api_key") or payload.get("agent_key") + if not isinstance(agent_key, str) or not agent_key.strip(): + raise HermesProviderError( + "Nous Portal agent-key mint response was missing api_key. " + "Run `hermes model` to re-authenticate." + ) + + # ``expires_at`` is ISO 8601; ``expires_in`` is seconds-from-now. + # Prefer expires_at when both present (server-authoritative). + new_expires_at = parse_iso_or_epoch(payload.get("expires_at")) + if new_expires_at is None: + expires_in = payload.get("expires_in") + if isinstance(expires_in, (int, float)) and expires_in > 0: + new_expires_at = time.time() + float(expires_in) + else: + # Conservative — assume the floor TTL we asked for. + new_expires_at = time.time() + AGENT_KEY_MIN_TTL_SECONDS + + self._shared_state.agent_key = agent_key.strip() + self._shared_state.agent_key_expires_at = new_expires_at + + # ------------------------------------------------------------------ + # forward / aforward — ensure creds, then delegate. Catch 401 once. + # ------------------------------------------------------------------ + + def forward(self, prompt=None, messages=None, **kwargs): # type: ignore[override] + self._ensure_credentials() + try: + return super().forward(prompt=prompt, messages=messages, **kwargs) + except litellm.AuthenticationError: + # Cached agent_key is dead despite passing the skew check. + # Force re-mint (which may also refresh OAuth on its own 401) + # and retry once. A second 401 propagates so the auth-abort + # sentinel + cost-ceiling path catches it. + self._force_remint() + return super().forward(prompt=prompt, messages=messages, **kwargs) + + async def aforward(self, prompt=None, messages=None, **kwargs): # type: ignore[override] + self._ensure_credentials() + try: + return await super().aforward(prompt=prompt, messages=messages, **kwargs) + except litellm.AuthenticationError: + self._force_remint() + return await super().aforward(prompt=prompt, messages=messages, **kwargs) + + +# ---------------------------------------------------------------------- +# Error classification — mirror hermes-agent error taxonomy +# ---------------------------------------------------------------------- + +# OAuth error codes from the Nous portal's /api/oauth/token endpoint that +# indicate a permanently invalid refresh token. User must re-authenticate. +_OAUTH_RELOGIN_ERROR_CODES = frozenset({"invalid_grant", "invalid_token"}) + + +def _format_oauth_error(response: httpx.Response) -> str: + """Translate a non-200 OAuth refresh response into an actionable user + message. Mirrors hermes_cli/auth.py:2595-2624. + """ + code, detail = _parse_error_body(response) + + if code == "refresh_token_reused" or "reuse" in detail.lower(): + return ( + "Nous Portal refresh token was already consumed by another " + "client (the portal enforces single-use refresh-token rotation). " + "Run `hermes model` and select Nous Portal to re-authenticate." + ) + + if code in _OAUTH_RELOGIN_ERROR_CODES or response.status_code in (401, 403): + return ( + f"Nous Portal OAuth refresh failed ({code}: {detail}). " + f"Run `hermes model` and select Nous Portal to re-authenticate." + ) + + return ( + f"Nous Portal OAuth refresh failed ({code}: {detail}). " + f"Re-try; if the failure persists, run `hermes model`." + ) + + +def _format_mint_error(response: httpx.Response) -> str: + """Translate a non-200 agent-key mint response. 401 from mint is + handled in ``_mint_agent_key`` (refresh-retry); this formatter sees + only the unrecoverable cases. + """ + code, detail = _parse_error_body(response) + if response.status_code in (401, 403): + return ( + f"Nous Portal agent-key mint failed ({code}: {detail}). " + f"Run `hermes model` and select Nous Portal to re-authenticate." + ) + return ( + f"Nous Portal agent-key mint failed (HTTP {response.status_code}, " + f"{code}: {detail}). Re-try; if the failure persists, run " + f"`hermes model`." + ) + + +def _parse_error_body(response: httpx.Response) -> tuple[str, str]: + """Best-effort parse of OAuth-style error JSON. Returns (code, detail) + with sensible defaults when the body is missing or malformed. + """ + code = "unknown" + detail = f"status {response.status_code}" + try: + body = response.json() + if isinstance(body, dict): + err = body.get("error") + if isinstance(err, dict): + # OpenAI shape: {"error": {"code": ..., "message": ...}} + nested_code = err.get("code") or err.get("type") + if isinstance(nested_code, str) and nested_code.strip(): + code = nested_code.strip() + nested_msg = err.get("message") + if isinstance(nested_msg, str) and nested_msg.strip(): + detail = nested_msg.strip() + elif isinstance(err, str) and err.strip(): + # OAuth-spec shape: {"error": "code", "error_description": "..."} + code = err.strip() + desc = body.get("error_description") or body.get("message") + if isinstance(desc, str) and desc.strip(): + detail = desc.strip() + except ValueError: + pass + return code, detail diff --git a/evolution/core/oauth_helpers.py b/evolution/core/oauth_helpers.py new file mode 100644 index 00000000..7efbe5e6 --- /dev/null +++ b/evolution/core/oauth_helpers.py @@ -0,0 +1,49 @@ +"""Shared OAuth helpers used by Codex and Nous LM wrappers. + +Kept as a small standalone module so the next OAuth provider that needs +in-memory refresh has somewhere obvious to drop shared utilities without +bloating either provider's LM file. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Optional + + +def parse_iso_or_epoch(value: Any) -> Optional[float]: + """Coerce an expires_at value into Unix epoch seconds. + + Different OAuth providers serialize token expiry in different shapes: + + * Nous Portal stores ISO 8601 strings ("2026-05-15T10:30:00+00:00") + in ``~/.hermes/auth.json``. + * Codex stores Unix epoch floats (or decodes from a JWT ``exp`` claim). + * Older or hand-edited entries may omit it entirely. + + Returns the equivalent Unix epoch float, or None when the value is + missing, malformed, or has no parseable shape. Callers treat None as + "unknown" — typically meaning "trigger a refresh" defensively. + """ + if value is None: + return None + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + s = value.strip() + if not s: + return None + # ISO 8601 — Python's fromisoformat handles "+00:00" but not the + # bare "Z" suffix common in OpenAI-shaped responses. + if s.endswith("Z"): + s = s[:-1] + "+00:00" + try: + return datetime.fromisoformat(s).timestamp() + except ValueError: + pass + # Numeric-looking string ("1747299600") — treat as epoch seconds. + try: + return float(s) + except ValueError: + return None + return None diff --git a/tests/core/test_nous_lm.py b/tests/core/test_nous_lm.py new file mode 100644 index 00000000..156b8b35 --- /dev/null +++ b/tests/core/test_nous_lm.py @@ -0,0 +1,514 @@ +"""Tests for the Nous Portal LM subclass. + +Covers: + * Construction wiring (inference_base_url, agent_key) + * Initial mint when agent_key missing or expiring + * OAuth refresh when access_token expiring + * Two-stage refresh-then-mint when both expiring + * Mint 401 → refresh + retry mint (Hermes pattern) + * Inference 401 → force re-mint and retry once + * Cross-instance state sharing (4 workers, 1 mint) + * Async path (aforward) + * Error classification: invalid_grant, refresh_token_reused +""" + +from __future__ import annotations + +import asyncio +import threading +import time +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import MagicMock, patch + +import httpx +import litellm +import pytest + +from evolution.core.hermes_provider import HermesProviderError +from evolution.core.nous_lm import ( + AGENT_KEY_MIN_TTL_SECONDS, + NOUS_OAUTH_CLIENT_ID, + NousLM, + _format_mint_error, + _format_oauth_error, + _reset_state_for_tests, +) +from evolution.core.oauth_helpers import parse_iso_or_epoch + + +@pytest.fixture(autouse=True) +def _clean_nous_state(): + _reset_state_for_tests() + yield + _reset_state_for_tests() + + +def _mock_response(*, status_code: int = 200, json_body: dict | None = None) -> MagicMock: + mock = MagicMock(spec=httpx.Response) + mock.status_code = status_code + if json_body is not None: + mock.json = MagicMock(return_value=json_body) + else: + mock.json = MagicMock(side_effect=ValueError("no body")) + return mock + + +def _mock_httpx_post(responses: list): + """Build an httpx.Client mock that returns responses in order across + calls to .post(). Lets us script multi-step flows (refresh-then-mint, + mint-401-refresh-retry). + """ + client = MagicMock() + client.__enter__.return_value = client + client.post.side_effect = responses + return client + + +# --------------------------------------------------------------------------- +# Construction +# --------------------------------------------------------------------------- + + +class TestNousLMConstruction: + def test_wires_inference_base_url_and_initial_agent_key(self): + # Pre-supplying a fresh agent_key should NOT trigger initial mint. + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + lm = NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, # not expiring + agent_key="initial-agent-key", + agent_key_expires_at=time.time() + 1800, # not expiring + inference_base_url="https://test-inference/v1", + ) + assert lm.kwargs["api_base"] == "https://test-inference/v1" + assert lm.kwargs["api_key"] == "initial-agent-key" + mock_cls.assert_not_called() + + def test_falls_back_to_default_inference_base(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "minted", "expires_in": 1800})] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + ) + assert "inference-api.nousresearch.com" in lm.kwargs["api_base"] + + +# --------------------------------------------------------------------------- +# Initial mint behavior +# --------------------------------------------------------------------------- + + +class TestInitialMint: + def test_mints_when_agent_key_missing(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "fresh-mint", "expires_in": 1800})] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + agent_key=None, + ) + assert lm.kwargs["api_key"] == "fresh-mint" + # Verify the mint POST shape + client = mock_cls.return_value + assert client.post.call_count == 1 + call = client.post.call_args + assert "/api/oauth/agent-key" in call.args[0] + assert call.kwargs["headers"]["Authorization"] == "Bearer oauth-tok" + assert call.kwargs["json"]["min_ttl_seconds"] == AGENT_KEY_MIN_TTL_SECONDS + + def test_mints_when_agent_key_within_skew_window(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "fresh-mint", "expires_in": 1800})] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + agent_key="stale-key", + agent_key_expires_at=time.time() + 60, # inside 120s skew + ) + assert lm.kwargs["api_key"] == "fresh-mint" + + def test_skips_mint_when_agent_key_fresh(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + lm = NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + agent_key="fresh-key", + agent_key_expires_at=time.time() + 1800, # well outside skew + ) + assert lm.kwargs["api_key"] == "fresh-key" + mock_cls.assert_not_called() + + +# --------------------------------------------------------------------------- +# Two-stage refresh + mint +# --------------------------------------------------------------------------- + + +class TestTwoStageRefreshMint: + def test_oauth_expiring_refreshes_then_mints(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={ + "access_token": "refreshed-oauth", + "expires_in": 86400, + } + ), + _mock_response( + json_body={"api_key": "post-refresh-mint", "expires_in": 1800} + ), + ] + ) + lm = NousLM( + model="openai/test-model", + access_token="stale-oauth", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 30, # within 120s skew + agent_key=None, # also needs mint + ) + client = mock_cls.return_value + assert client.post.call_count == 2 + # First call: OAuth refresh + first = client.post.call_args_list[0] + assert "/api/oauth/token" in first.args[0] + assert first.kwargs["data"]["grant_type"] == "refresh_token" + assert first.kwargs["data"]["client_id"] == NOUS_OAUTH_CLIENT_ID + # Second call: mint with the REFRESHED access_token + second = client.post.call_args_list[1] + assert "/api/oauth/agent-key" in second.args[0] + assert second.kwargs["headers"]["Authorization"] == "Bearer refreshed-oauth" + assert lm.kwargs["api_key"] == "post-refresh-mint" + + def test_oauth_response_rotated_refresh_token_persisted(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={ + "access_token": "new-oauth", + "refresh_token": "rotated-refresh", + "expires_in": 86400, + } + ), + _mock_response( + json_body={"api_key": "minted", "expires_in": 1800} + ), + ] + ) + lm = NousLM( + model="openai/test-model", + access_token="stale-oauth", + refresh_token="original-refresh", + oauth_expires_at=time.time() + 30, + ) + assert lm._shared_state.refresh_token == "rotated-refresh" + + +# --------------------------------------------------------------------------- +# Mint 401 → refresh OAuth + retry mint (Hermes pattern) +# --------------------------------------------------------------------------- + + +class TestMint401TriggersRefreshRetry: + def test_mint_401_refreshes_and_retries(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + # First mint attempt: 401 + _mock_response(status_code=401, json_body={"error": "invalid_token"}), + # OAuth refresh succeeds + _mock_response( + json_body={"access_token": "refreshed", "expires_in": 86400} + ), + # Second mint attempt with refreshed access_token: success + _mock_response(json_body={"api_key": "post-retry-mint", "expires_in": 1800}), + ] + ) + lm = NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, # OAuth says "still valid" + ) + client = mock_cls.return_value + assert client.post.call_count == 3 + assert lm.kwargs["api_key"] == "post-retry-mint" + + def test_mint_401_retry_also_fails_propagates(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response(status_code=401, json_body={"error": "invalid_token"}), + _mock_response( + json_body={"access_token": "refreshed", "expires_in": 86400} + ), + # Retry mint also 401 — give up. + _mock_response(status_code=401, json_body={"error": "invalid_token"}), + ] + ) + with pytest.raises(HermesProviderError, match="hermes model"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + ) + + +# --------------------------------------------------------------------------- +# Inference 401 → force re-mint + retry once +# --------------------------------------------------------------------------- + + +class TestInferenceForceRemint: + def _build_lm_with_initial_mint(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "first-mint", "expires_in": 1800})] + ) + return NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + ) + + def test_forward_recovers_from_401_with_remint_and_retry(self): + lm = self._build_lm_with_initial_mint() + # Now inference 401s once, then succeeds after re-mint. + with patch("dspy.LM.forward", autospec=True) as mock_super, \ + patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_super.side_effect = [ + litellm.AuthenticationError( + message="401 Unauthorized", + llm_provider="openai", + model="openai/test-model", + ), + "ok", + ] + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "post-401-mint", "expires_in": 1800})] + ) + result = lm.forward(messages=[{"role": "user", "content": "hi"}]) + assert result == "ok" + assert mock_super.call_count == 2 + # The cached agent_key was refreshed before the retry. + assert lm.kwargs["api_key"] == "post-401-mint" + + def test_forward_propagates_second_401(self): + lm = self._build_lm_with_initial_mint() + with patch("dspy.LM.forward", autospec=True) as mock_super, \ + patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + err = litellm.AuthenticationError( + message="401", + llm_provider="openai", + model="openai/test-model", + ) + mock_super.side_effect = [err, err] + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})] + ) + with pytest.raises(litellm.AuthenticationError): + lm.forward(messages=[{"role": "user", "content": "hi"}]) + + +# --------------------------------------------------------------------------- +# Cross-instance state sharing — concurrent mint race +# --------------------------------------------------------------------------- + + +class TestCrossInstanceSharing: + def test_concurrent_mint_only_posts_once(self): + """Four NousLM instances sharing the same refresh_token must + coordinate so only ONE actually POSTs to /api/oauth/agent-key. + Without shared state, three would race the portal. + """ + post_count = {"n": 0} + post_lock = threading.Lock() + + def slow_post(*args, **kwargs): + with post_lock: + post_count["n"] += 1 + time.sleep(0.05) + return _mock_response( + json_body={"api_key": "concurrent-mint", "expires_in": 1800} + ) + + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + client = MagicMock() + client.__enter__.return_value = client + client.post.side_effect = slow_post + mock_cls.return_value = client + + shared_args = dict( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="shared-refresh", + oauth_expires_at=time.time() + 86400, + agent_key="stale-key", + agent_key_expires_at=time.time() + 60, + ) + instances = [NousLM(**shared_args) for _ in range(4)] + + with ThreadPoolExecutor(max_workers=4) as pool: + futs = [pool.submit(lm._ensure_credentials) for lm in instances] + for f in futs: + f.result(timeout=10) + + # Initial construction triggers one mint (skew check positive + # because agent_key_expires_at < now+120). _ensure_credentials + # called again on each of 4 threads should observe shared state + # and NOT mint again. So total POSTs = 1 (the constructor mint). + assert post_count["n"] == 1 + for lm in instances: + assert lm.kwargs["api_key"] == "concurrent-mint" + + +# --------------------------------------------------------------------------- +# Async path +# --------------------------------------------------------------------------- + + +class TestAsyncPath: + def test_aforward_invokes_ensure_credentials(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "minted", "expires_in": 1800})] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="refresh", + oauth_expires_at=time.time() + 86400, + ) + + with patch.object(NousLM, "_ensure_credentials") as mock_ensure, \ + patch("dspy.LM.aforward", autospec=True) as mock_super_aforward: + mock_super_aforward.return_value = asyncio.sleep(0, result="ok") + + asyncio.run( + lm.aforward(messages=[{"role": "user", "content": "hi"}]) + ) + + mock_ensure.assert_called_once() + mock_super_aforward.assert_called_once() + + +# --------------------------------------------------------------------------- +# Error classification +# --------------------------------------------------------------------------- + + +class TestErrorClassification: + def test_invalid_grant_surfaces_relogin_message(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + # OAuth refresh fails with invalid_grant. + _mock_response( + status_code=400, + json_body={"error": "invalid_grant", "error_description": "bad"}, + ), + ] + ) + with pytest.raises(HermesProviderError, match="hermes model"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="bad-refresh", + oauth_expires_at=time.time() + 30, # forces refresh path + ) + + def test_refresh_token_reused_special_message(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + status_code=400, + json_body={ + "error": { + "code": "refresh_token_reused", + "message": "Already consumed", + } + }, + ) + ] + ) + with pytest.raises(HermesProviderError) as excinfo: + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="reused", + oauth_expires_at=time.time() + 30, + ) + msg = str(excinfo.value) + assert "another client" in msg + assert "hermes model" in msg + + def test_format_oauth_error_handles_no_body(self): + resp = _mock_response(status_code=500, json_body=None) + msg = _format_oauth_error(resp) + assert "status 500" in msg + + def test_format_mint_error_extracts_openai_shape(self): + resp = _mock_response( + status_code=403, + json_body={"error": {"code": "rate_limited", "message": "slow down"}}, + ) + msg = _format_mint_error(resp) + assert "rate_limited" in msg + assert "slow down" in msg + assert "hermes model" in msg + + +# --------------------------------------------------------------------------- +# parse_iso_or_epoch +# --------------------------------------------------------------------------- + + +class TestParseIsoOrEpoch: + def test_iso8601_with_offset(self): + result = parse_iso_or_epoch("2026-05-15T10:30:00+00:00") + assert result == 1778841000.0 + + def test_iso8601_with_z_suffix(self): + # OpenAI-shaped: trailing Z is shorthand for +00:00 + result = parse_iso_or_epoch("2026-05-15T10:30:00Z") + assert result == 1778841000.0 + + def test_unix_epoch_float(self): + assert parse_iso_or_epoch(1779179400.0) == 1779179400.0 + + def test_unix_epoch_int(self): + assert parse_iso_or_epoch(1779179400) == 1779179400.0 + + def test_numeric_string(self): + assert parse_iso_or_epoch("1779179400") == 1779179400.0 + + def test_none_returns_none(self): + assert parse_iso_or_epoch(None) is None + + def test_empty_string_returns_none(self): + assert parse_iso_or_epoch("") is None + assert parse_iso_or_epoch(" ") is None + + def test_garbage_returns_none(self): + assert parse_iso_or_epoch("not-a-timestamp") is None From df2b130114fa80911f14762c19c25388ab428534 Mon Sep 17 00:00:00 2001 From: Justin Ramos Date: Fri, 15 May 2026 09:01:27 -0600 Subject: [PATCH 3/5] feat(hermes_provider): route provider: nous OAuth flow through NousLM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new branch in resolve_default_lm: when canonical == "nous" AND the auth.json pool entry has a refresh_token (signals OAuth-managed flow that hermes model writes), build a ResolvedLM whose lm_factory constructs a NousLM that handles the two-stage refresh + agent_key mint internally. Pool entries WITHOUT refresh_token (env-var-style NOUS_API_KEY users) fall through to the existing OpenAI-wire direct-pass-through path unchanged. Note that direct-pass-through path probably also doesn't work for Nous (the pool's access_token field holds the OAuth token, not the agent_key the inference endpoint needs) — but that's a pre-existing condition orthogonal to this PR. We don't try to "upgrade" those users silently. Missing pool entry → HermesProviderError pointing at `hermes model` recovery rather than silent fall-through to the broken direct path. instantiate_lm + the existing _probe_via_factory in auth_check (added in PR #58) both already dispatch on lm_factory presence — Nous flows through them unchanged. The Nous recovery hint is already in _HERMES_AUTH_COMMAND_BY_PROVIDER from the original auth-check work. --- evolution/core/hermes_provider.py | 111 +++++++++++++ tests/core/test_nous_provider.py | 267 ++++++++++++++++++++++++++++++ 2 files changed, 378 insertions(+) create mode 100644 tests/core/test_nous_provider.py diff --git a/evolution/core/hermes_provider.py b/evolution/core/hermes_provider.py index f66d0652..8c7c66f9 100644 --- a/evolution/core/hermes_provider.py +++ b/evolution/core/hermes_provider.py @@ -312,6 +312,18 @@ def resolve_default_lm( auth_store=auth_store, target_model=target_model, role=role ) + # Nous Portal: when the credential pool entry has a refresh_token (the + # OAuth-managed flow that hermes model writes), route through NousLM + # for in-memory OAuth refresh + agent_key minting. The plain env-var + # NOUS_API_KEY path falls through to the generic OpenAI-wire handler + # below — no behavior change for that simpler setup. + if canonical == "nous": + nous_resolved = _maybe_resolve_nous_lm( + auth_store=auth_store, target_model=target_model, role=role + ) + if nous_resolved is not None: + return nous_resolved + if not target_model: raise HermesProviderError( f"~/.hermes/config.yaml sets provider='{requested_provider}' " @@ -703,6 +715,105 @@ def _factory() -> Any: ) +def _maybe_resolve_nous_lm( + *, + auth_store: Dict[str, Any], + target_model: str, + role: Role, +) -> Optional[ResolvedLM]: + """Build a NousLM-backed ResolvedLM when the auth.json pool entry + looks OAuth-managed; return None to let the caller fall through to + the generic OpenAI-wire handler when the entry is just an env-var- + style API key. + + Nous uses a two-stage credential model: an OAuth access_token + (long-lived) is exchanged for a short-lived agent_key that's the + actual inference Bearer. NousLM handles both: refresh access_token + in-memory when expiring, mint a fresh agent_key from it, re-mint on + inference 401. See evolution/core/nous_lm.py. + + The "looks OAuth-managed" signal: pool entry has a refresh_token. A + pool entry without refresh_token is either env-var-only (NOUS_API_KEY + set, no real OAuth state) or hand-edited; let the caller fall + through to direct pass-through so we don't break that setup. + + The CodexLM-equivalent NousLM import is lazy to avoid a circular + dependency: nous_lm imports HermesProviderError from this module. + """ + pool_entry = _pick_pool_entry(auth_store, "nous") + if pool_entry is None: + # No pool entry at all → hint operator at the right recovery + # rather than falling through silently to env-var resolution + # that probably also won't work. + raise HermesProviderError( + "~/.hermes/config.yaml sets provider='nous' but no usable " + "entry was found in ~/.hermes/auth.json credential_pool[\"nous\"]. " + "Run `hermes model` and select Nous Portal to authenticate, " + f"or pass --{role}-model to bypass Hermes resolution." + ) + + refresh_token = _str_or_none(pool_entry.get("refresh_token")) + if not refresh_token: + # OAuth flow not in play; let the caller use the existing + # env-var/access_token-as-Bearer path. Note this path will still + # 401 against Nous's inference endpoint when the field actually + # holds the OAuth access_token (Nous needs the minted agent_key), + # but we can't tell from here whether the user intends OAuth or + # env-var, so the conservative posture is "don't change behavior". + return None + + access_token = _str_or_none(pool_entry.get("access_token")) + if not access_token: + raise HermesProviderError( + "~/.hermes/auth.json credential_pool[\"nous\"] entry has no " + "access_token. Run `hermes model` and select Nous Portal to " + "re-authenticate." + ) + + if not target_model: + raise HermesProviderError( + "~/.hermes/config.yaml sets provider='nous' but model.default " + f"is empty. Set it (e.g., 'Hermes-4-405B'), or pass --{role}-model." + ) + + # Lazy import to break the circular dependency with nous_lm. + from evolution.core.nous_lm import ( # noqa: PLC0415 + NousLM as _NousLM, + NOUS_INFERENCE_BASE_URL, + NOUS_PORTAL_BASE_URL, + ) + from evolution.core.oauth_helpers import parse_iso_or_epoch # noqa: PLC0415 + + inference_base_url = ( + _str_or_none(pool_entry.get("inference_base_url")) + or _str_or_none(pool_entry.get("base_url")) + or NOUS_INFERENCE_BASE_URL + ) + oauth_expires_at = parse_iso_or_epoch(pool_entry.get("expires_at")) + agent_key = _str_or_none(pool_entry.get("agent_key")) + agent_key_expires_at = parse_iso_or_epoch(pool_entry.get("agent_key_expires_at")) + + def _factory() -> Any: + return _NousLM( + model=f"openai/{target_model}", + access_token=access_token, + refresh_token=refresh_token, + oauth_expires_at=oauth_expires_at, + agent_key=agent_key, + agent_key_expires_at=agent_key_expires_at, + portal_base_url=NOUS_PORTAL_BASE_URL, + inference_base_url=inference_base_url, + ) + + return ResolvedLM( + model=f"openai/{target_model}", + lm_kwargs={}, + source=f"hermes-config:nous(inference_base_url={inference_base_url})", + lm_factory=_factory, + provider_hint="nous", + ) + + def _build_resolved_lm( *, provider: str, diff --git a/tests/core/test_nous_provider.py b/tests/core/test_nous_provider.py new file mode 100644 index 00000000..1ee4ec6f --- /dev/null +++ b/tests/core/test_nous_provider.py @@ -0,0 +1,267 @@ +"""Tests for Nous Portal resolution in the Hermes-aware LM resolver. + +The resolver routes ``provider: nous`` through ``_maybe_resolve_nous_lm`` +when the auth.json pool entry has a refresh_token (signals OAuth-managed +flow). Pool entries without refresh_token (env-var-style) fall back to +the existing direct-pass-through path so we don't break that simpler +setup. Without a pool entry at all, the resolver fails with an +actionable `hermes model` recovery hint rather than silently routing to +something that won't work. +""" + +from __future__ import annotations + +import json +import textwrap +import time +from pathlib import Path + +import pytest + +from evolution.core.hermes_provider import ( + HermesProviderError, + ResolvedLM, + resolve_default_lm, +) +from evolution.core.nous_lm import NousLM, _reset_state_for_tests + + +@pytest.fixture(autouse=True) +def _clean_nous_state(): + _reset_state_for_tests() + yield + _reset_state_for_tests() + + +@pytest.fixture +def hermes_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + home = tmp_path / "hermes_home" + home.mkdir() + for var in ( + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "OPENROUTER_API_KEY", + "NOUS_API_KEY", + ): + monkeypatch.delenv(var, raising=False) + return home + + +def _write_config(home: Path, body: str) -> None: + (home / "config.yaml").write_text(textwrap.dedent(body).lstrip()) + + +def _write_nous_pool( + home: Path, + *, + access_token: str = "oauth-tok", + refresh_token: str | None = "refresh-tok", + oauth_expires_at: str | None = "2026-12-01T00:00:00+00:00", + agent_key: str | None = None, + agent_key_expires_at: str | None = None, + inference_base_url: str | None = None, + extra: dict | None = None, +) -> None: + entry: dict = { + "access_token": access_token, + "priority": 0, + } + if refresh_token is not None: + entry["refresh_token"] = refresh_token + if oauth_expires_at is not None: + entry["expires_at"] = oauth_expires_at + if agent_key is not None: + entry["agent_key"] = agent_key + if agent_key_expires_at is not None: + entry["agent_key_expires_at"] = agent_key_expires_at + if inference_base_url is not None: + entry["inference_base_url"] = inference_base_url + if extra: + entry.update(extra) + (home / "auth.json").write_text( + json.dumps({"credential_pool": {"nous": [entry]}}) + ) + + +# --------------------------------------------------------------------------- +# OAuth-managed flow: pool entry has refresh_token → NousLM factory +# --------------------------------------------------------------------------- + + +class TestNousResolutionWithOAuth: + def test_oauth_pool_entry_returns_factory(self, hermes_home): + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + # Use a future agent_key so the factory's initial mint doesn't + # actually fire when the test calls factory() — keeps this test + # purely about the resolver's wiring. + _write_nous_pool( + hermes_home, + agent_key="fresh-key", + agent_key_expires_at="2026-12-01T00:00:00+00:00", + ) + lm = resolve_default_lm(role="optimizer", hermes_home=hermes_home) + assert isinstance(lm, ResolvedLM) + assert lm.model == "openai/Hermes-4-405B" + assert lm.lm_kwargs == {} + assert lm.lm_factory is not None + assert lm.provider_hint == "nous" + + def test_factory_constructs_nous_lm_with_oauth_state(self, hermes_home): + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + _write_nous_pool( + hermes_home, + access_token="real-oauth-tok", + refresh_token="real-refresh-tok", + agent_key="initial-agent-key", + agent_key_expires_at="2026-12-01T00:00:00+00:00", + ) + resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home) + nous_lm = resolved.lm_factory() + assert isinstance(nous_lm, NousLM) + assert nous_lm._shared_state.access_token == "real-oauth-tok" + assert nous_lm._shared_state.refresh_token == "real-refresh-tok" + assert nous_lm._shared_state.agent_key == "initial-agent-key" + + def test_custom_inference_base_url_flows_through(self, hermes_home): + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + _write_nous_pool( + hermes_home, + agent_key="fresh-key", + agent_key_expires_at="2026-12-01T00:00:00+00:00", + inference_base_url="https://custom-nous.example.com/v1", + ) + resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home) + nous_lm = resolved.lm_factory() + assert nous_lm.kwargs["api_base"] == "https://custom-nous.example.com/v1" + + +# --------------------------------------------------------------------------- +# Fallback paths +# --------------------------------------------------------------------------- + + +class TestNousResolutionFallbacks: + def test_pool_entry_without_refresh_token_falls_back_to_direct( + self, hermes_home + ): + # Env-var-style: pool has only access_token, no refresh_token. + # The resolver must fall through to the existing OpenAI-wire + # direct-pass-through path so a NOUS_API_KEY user keeps working. + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + _write_nous_pool( + hermes_home, + access_token="bare-api-key", + refresh_token=None, + oauth_expires_at=None, + ) + resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home) + # Direct pass-through path: openai/, api_base + api_key in lm_kwargs, + # no factory. + assert resolved.lm_factory is None + assert resolved.model == "openai/Hermes-4-405B" + assert resolved.lm_kwargs.get("api_key") == "bare-api-key" + + def test_missing_pool_entry_surfaces_recovery_hint(self, hermes_home): + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + # No auth.json written → no credential pool. + with pytest.raises(HermesProviderError, match="hermes model"): + resolve_default_lm(role="optimizer", hermes_home=hermes_home) + + def test_empty_access_token_in_oauth_entry_surfaces_recovery( + self, hermes_home + ): + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + # Has refresh_token (OAuth-managed signal) but no access_token. + _write_nous_pool( + hermes_home, + access_token="", + refresh_token="refresh-tok", + ) + with pytest.raises(HermesProviderError, match="hermes model"): + resolve_default_lm(role="optimizer", hermes_home=hermes_home) + + def test_no_model_default_for_oauth_path_surfaces_actionable( + self, hermes_home + ): + _write_config( + hermes_home, + """ + model: + provider: nous + """, + ) + _write_nous_pool(hermes_home) + with pytest.raises(HermesProviderError, match="model.default"): + resolve_default_lm(role="optimizer", hermes_home=hermes_home) + + +# --------------------------------------------------------------------------- +# Pool exhaustion regression +# --------------------------------------------------------------------------- + + +class TestPoolExhaustionRespected: + def test_exhausted_entry_skipped_with_future_reset(self, hermes_home): + # The existing _is_pool_entry_usable logic skips entries Hermes + # marked exhausted with a future cooldown. Confirm it still + # applies to the Nous OAuth path — should fall through to the + # missing-entry error. + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + _write_nous_pool( + hermes_home, + extra={ + "last_status": "exhausted", + "last_error_reset_at": time.time() + 3600, # 1h in future + }, + ) + with pytest.raises(HermesProviderError, match="hermes model"): + resolve_default_lm(role="optimizer", hermes_home=hermes_home) From e53a4459907ac587ae1ad043795b0f9ee02313d7 Mon Sep 17 00:00:00 2001 From: Justin Ramos Date: Fri, 15 May 2026 09:09:05 -0600 Subject: [PATCH 4/5] docs+chore: Nous Portal section + manual smoke harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs/model_resolution.md: * New "Nous Portal OAuth + agent_key" section paralleling the Codex section. Documents the two-stage credential model, the in-memory refresh + mint flow, the HERMES_PORTAL_BASE_URL override knob, the in-memory-only posture, and the env-var-fallthrough behavior for pool entries without a refresh_token. * Future-work list trimmed: removes the now-shipped non-Codex OAuth bullet; rewords what remains so it's clear which providers are intentionally out of scope (Qwen / Spotify / Gemini). tests/manual/nous_smoke.py (NEW): Runnable mock-server smoke that validates the Nous wire flow without needing a real Nous Portal account. Spins up a stdlib http.server pretending to be portal.nousresearch.com, drives a real NousLM (and through it, a real LiteLLM call) against it, asserts on five scenarios: 1. Initial mint: agent-key POST carries the seed access_token as Bearer; min_ttl_seconds=1800 in body. 2. OAuth-expiring → refresh-then-mint: confirms call ordering and that the mint POST uses the REFRESHED access_token (proves the sequencing isn't backwards). 3. Inference uses the minted agent_key: the inference POST's Authorization header is the MINTED key, not the OAuth token — this is the headline bug the whole PR fixes. 4. Mid-run inference 401: forward's exception handler force-re-mints and retries; the recorded HTTP exchange shows mint→infer(401)→ re-mint→infer(200) with two distinct minted keys. 5. OAuth refresh invalid_grant → HermesProviderError with the `hermes model` recovery hint. Smoke uses cache=False + num_retries=0 to expose the underlying network behavior — DSPy cache would otherwise leak prior responses across scenarios and LiteLLM's internal retry-on-401 would mask our own re-mint logic. Comments explain both choices. Not part of CI (heavyweight; spins up a server). Documented in the Nous setup section as the recommended way to gain confidence in the Nous flow when a real Portal account isn't available. --- docs/model_resolution.md | 32 ++- tests/manual/nous_smoke.py | 455 +++++++++++++++++++++++++++++++++++++ 2 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 tests/manual/nous_smoke.py diff --git a/docs/model_resolution.md b/docs/model_resolution.md index c739f333..18277da1 100644 --- a/docs/model_resolution.md +++ b/docs/model_resolution.md @@ -140,6 +140,36 @@ Refresh is **in-memory only** — the framework does not write back to `~/.herme **What's not supported:** streaming via the Responses endpoint (evolution doesn't stream), Codex-specific reasoning-effort overrides (DSPy's defaults work for gpt-5-class), and tool-call message conversion beyond what DSPy's `_convert_chat_request_to_responses_request` already handles. If a Codex 401 surfaces during a run, the standard auth-error panel renders with the `hermes auth add openai-codex` recovery hint. +## Nous Portal OAuth + agent_key + +Nous Portal uses a two-stage credential model that's different from every other provider: + +1. **OAuth access_token** (long-lived, days). Refreshable via the standard `refresh_token` grant. +2. **agent_key** (short-lived, ~30 minutes). Minted from the access_token via a Nous-specific `POST /api/oauth/agent-key`. The inference endpoint requires the **agent_key** as Bearer — not the access_token. + +Run `hermes model` and select Nous Portal to populate `~/.hermes/auth.json` with both. Then point `config.yaml` at Nous: + +```yaml +# ~/.hermes/config.yaml +model: + default: Hermes-4-405B + provider: nous +``` + +When the resolver detects a Nous credential pool entry with a `refresh_token` (signals OAuth-managed flow), the framework instantiates a `NousLM` subclass that: + +1. **Mints a fresh agent_key at preflight time** by POSTing to `{portal}/api/oauth/agent-key` with the OAuth access_token as Bearer. +2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing at `hermes_cli/auth.py:3061-3193`. +3. **Re-mints on inference 401** (mid-run agent_key revocation or expiration). The four LM roles (optimizer, reflection, eval, judge) coordinate through a shared lock so a four-thread evolution doesn't race the portal's single-use refresh-token rotation. + +The portal URL is overridable via `HERMES_PORTAL_BASE_URL` (Hermes's own env var name; sharing keeps configs portable for stage / mock setups). + +Refresh + mint state is **in-memory only** — the framework never writes back to `~/.hermes/auth.json`. For evolution sessions running longer than the on-disk agent_key TTL (~30 minutes since the last `hermes model`), the in-process refresh handles it. For multi-day sessions, periodic `hermes model` keeps the on-disk store fresh. + +**What's not supported:** auxiliary endpoints (vision / web-extract / session-search models from `auxiliary.*` config), streaming, and `auth.json` writeback. Pool entries without `refresh_token` (env-var-style `NOUS_API_KEY` setups) fall through to the existing direct-pass-through path — note that path probably doesn't actually work for Nous inference (the access_token isn't a valid Bearer), but we don't try to "upgrade" those users silently. + +A runnable smoke harness at `tests/manual/nous_smoke.py` validates the Nous wire flow against a local mock portal (no real Nous Portal account required). Run via `uv run python tests/manual/nous_smoke.py`. + ## Per-role overrides When your provider exposes multiple models, you can pick a different one per role to manage cost. Common pattern: a frontier model for the optimizer + reflection LMs (where reasoning matters), a cheaper model for eval + judge (where you'll make many calls): @@ -236,7 +266,7 @@ The framework defaults all four roles to Hermes's single `model.default`. To use This module currently does not: -- Refresh expired OAuth tokens for non-Codex providers (delegated to `hermes auth add ` / `hermes model`; Codex tokens refresh in-memory — see [OpenAI Codex Responses API](#openai-codex-responses-api)) - Honor `auxiliary.*` provider config from `config.yaml` (Hermes's vision/web-extract/session-search routing) +- OAuth refresh for Qwen, Spotify, or Google Gemini providers (Codex and Nous Portal handled in-memory — see their dedicated sections above; the other OAuth providers in Hermes don't have demand from the evolution use case yet) The slim resolver lives at `evolution/core/hermes_provider.py`. The mapping table is sourced from `hermes_cli/auth.py` constants — drift is possible; update by reference when Hermes adds providers. diff --git a/tests/manual/nous_smoke.py b/tests/manual/nous_smoke.py new file mode 100644 index 00000000..e52c1146 --- /dev/null +++ b/tests/manual/nous_smoke.py @@ -0,0 +1,455 @@ +"""Manual end-to-end smoke for the Nous Portal OAuth + agent_key flow. + +Why this exists: + We have unit tests for NousLM that mock httpx.Client at the Python + layer. Those catch shape bugs but don't validate that the real network + call we'd make against `portal.nousresearch.com` carries the right + headers, body, and bearer. The user has no Nous Portal account, so + we can't run a true end-to-end smoke against the real portal. + +What this script does: + Spin up a stdlib http.server on a random localhost port that pretends + to be the Nous portal. Routes: + POST /api/oauth/token — refresh_token grant + POST /api/oauth/agent-key — agent_key mint + POST /v1/chat/completions — OpenAI-compat inference (so the actual + inference call returns 200 too) + + Construct a NousLM via the real resolver pointed at the local server, + drive several scenarios (initial mint, OAuth refresh + mint, mid-run + 401 recovery), and assert the recorded HTTP exchange matches expected + shape. + +How to run: + uv run python tests/manual/nous_smoke.py + + Exits 0 on success, prints a recorded-requests summary, and 1 on any + failed assertion. Not part of CI — heavyweight (spins up a server) + and not needed on every commit. +""" + +from __future__ import annotations + +import json +import sys +import threading +import time +from collections import deque +from datetime import datetime, timezone, timedelta +from http.server import BaseHTTPRequestHandler, HTTPServer +from typing import Any, Dict, List, Optional + +import litellm + +# Override module-level URLs BEFORE importing nous_lm so the constants +# pick up the local server. Real callers would set HERMES_PORTAL_BASE_URL +# in their shell; we set it here for the in-process smoke. +import os +_PORT_HOLDER = {"port": 0} + + +# --------------------------------------------------------------------------- +# Mock portal server +# --------------------------------------------------------------------------- + + +class _RecordingHandler(BaseHTTPRequestHandler): + """Routes the four endpoints we care about and records every request.""" + + recorded: deque = deque() + # Behavior knobs flipped per scenario from the main thread. + behavior: Dict[str, Any] = { + "refresh_status": 200, + "refresh_body": None, # set per scenario + "mint_status": 200, + "mint_body": None, + "mint_call_count": 0, + "mint_first_status": None, # 401 then 200, for the refresh-retry test + "infer_status": 200, + "infer_call_count": 0, + "infer_first_status": None, # 401 then 200, for the inference-retry test + } + + def log_message(self, format, *args): # silence default access logs + pass + + def _read_body(self) -> bytes: + length = int(self.headers.get("Content-Length", "0") or "0") + return self.rfile.read(length) if length > 0 else b"" + + def _record(self, body: bytes) -> Dict[str, Any]: + try: + parsed = json.loads(body) if body else {} + except json.JSONDecodeError: + parsed = body.decode("utf-8", errors="replace") + entry = { + "method": self.command, + "path": self.path, + "headers": {k: v for k, v in self.headers.items()}, + "body": parsed, + } + self.recorded.append(entry) + return entry + + def _respond(self, status: int, body: Dict[str, Any]) -> None: + payload = json.dumps(body).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def do_POST(self): # noqa: N802 — http.server convention + body = self._read_body() + self._record(body) + + if self.path.endswith("/api/oauth/token"): + self._respond( + self.behavior["refresh_status"], + self.behavior["refresh_body"] + or { + "access_token": "REFRESHED-OAUTH", + "refresh_token": "REFRESHED-REFRESH", + "expires_in": 86400, + "token_type": "Bearer", + }, + ) + return + + if self.path.endswith("/api/oauth/agent-key"): + self.behavior["mint_call_count"] += 1 + # First-call override (used to simulate "stale OAuth → mint 401 → refresh + retry") + if ( + self.behavior["mint_first_status"] is not None + and self.behavior["mint_call_count"] == 1 + ): + self._respond( + self.behavior["mint_first_status"], + {"error": "invalid_token"}, + ) + return + future = datetime.now(tz=timezone.utc) + timedelta(seconds=1800) + self._respond( + self.behavior["mint_status"], + self.behavior["mint_body"] + or { + "api_key": f"MINTED-AGENT-KEY-{self.behavior['mint_call_count']}", + "key_id": "test-key-id", + "expires_at": future.strftime("%Y-%m-%dT%H:%M:%S+00:00"), + "expires_in": 1800, + "reused": False, + }, + ) + return + + if "/chat/completions" in self.path: + self.behavior["infer_call_count"] += 1 + if ( + self.behavior["infer_first_status"] is not None + and self.behavior["infer_call_count"] == 1 + ): + self._respond( + self.behavior["infer_first_status"], + {"error": {"code": "invalid_api_key", "message": "401"}}, + ) + return + self._respond( + self.behavior["infer_status"], + { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": int(time.time()), + "model": "test-model", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "OK"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}, + }, + ) + return + + self._respond(404, {"error": "unknown_route", "path": self.path}) + + +# --------------------------------------------------------------------------- +# Scenario harness +# --------------------------------------------------------------------------- + + +def _start_server() -> HTTPServer: + server = HTTPServer(("127.0.0.1", 0), _RecordingHandler) + _PORT_HOLDER["port"] = server.server_port + threading.Thread(target=server.serve_forever, daemon=True).start() + return server + + +def _reset_recordings(): + _RecordingHandler.recorded.clear() + _RecordingHandler.behavior.update( + { + "refresh_status": 200, + "refresh_body": None, + "mint_status": 200, + "mint_body": None, + "mint_call_count": 0, + "mint_first_status": None, + "infer_status": 200, + "infer_call_count": 0, + "infer_first_status": None, + } + ) + + +def _make_lm(*, port: int, **state): + """Construct a NousLM pointed at the local mock server.""" + from evolution.core.nous_lm import NousLM, _reset_state_for_tests + + _reset_state_for_tests() + base_url = f"http://127.0.0.1:{port}" + defaults = dict( + access_token="seed-oauth", + refresh_token="seed-refresh", + oauth_expires_at=time.time() + 86400, + agent_key=None, + agent_key_expires_at=None, + portal_base_url=base_url, + inference_base_url=f"{base_url}/v1", + # cache=False so each smoke scenario actually hits the wire — DSPy's + # response cache ignores api_key/api_base in the cache key, which + # would otherwise let one scenario's response leak into the next. + # num_retries=0 because LiteLLM's internal retry-on-401 would + # transparently recover before NousLM.forward's 401 handler sees + # the failure, masking our re-mint logic. + cache=False, + num_retries=0, + ) + defaults.update(state) + return NousLM(model="openai/test-model", **defaults) + + +def _summary(label: str) -> str: + lines = [f"\n=== {label} ==="] + for i, r in enumerate(_RecordingHandler.recorded): + auth = r["headers"].get("Authorization", "") + body_preview = ( + json.dumps(r["body"])[:80] if not isinstance(r["body"], str) else r["body"][:80] + ) + lines.append( + f" [{i}] {r['method']} {r['path']:30} auth={auth[:40]:40} body={body_preview}" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Scenarios +# --------------------------------------------------------------------------- + + +def scenario_initial_mint(port: int) -> List[str]: + """Fresh construction with no agent_key → one mint POST, no refresh.""" + failures = [] + _reset_recordings() + lm = _make_lm(port=port) + + if _RecordingHandler.behavior["mint_call_count"] != 1: + failures.append( + f"expected 1 mint call, got {_RecordingHandler.behavior['mint_call_count']}" + ) + refresh_calls = sum( + 1 for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/token") + ) + if refresh_calls != 0: + failures.append( + f"OAuth refresh fired unnecessarily ({refresh_calls} call(s))" + ) + mint_record = next( + r for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/agent-key") + ) + if mint_record["headers"].get("Authorization") != "Bearer seed-oauth": + failures.append( + f"mint POST should carry seed access_token as Bearer; got " + f"{mint_record['headers'].get('Authorization')}" + ) + if mint_record["body"].get("min_ttl_seconds") != 1800: + failures.append( + f"mint POST should request 1800s min TTL; got {mint_record['body']}" + ) + if not str(lm.kwargs["api_key"]).startswith("MINTED-AGENT-KEY"): + failures.append( + f"NousLM api_key should be the minted agent_key; got {lm.kwargs['api_key']}" + ) + + print(_summary("initial_mint")) + return failures + + +def scenario_oauth_expiring_refreshes_then_mints(port: int) -> List[str]: + """When OAuth is expiring AND no agent_key, expect refresh THEN mint + (in that order), with the mint POST using the refreshed access_token. + """ + failures = [] + _reset_recordings() + lm = _make_lm(port=port, oauth_expires_at=time.time() + 30) + + paths = [r["path"] for r in _RecordingHandler.recorded] + if not paths or not paths[0].endswith("/api/oauth/token"): + failures.append( + f"expected OAuth refresh first; got call sequence {paths}" + ) + if len(paths) < 2 or not paths[1].endswith("/api/oauth/agent-key"): + failures.append( + f"expected mint as second call; got call sequence {paths}" + ) + if len(_RecordingHandler.recorded) >= 2: + mint_record = _RecordingHandler.recorded[1] + # The refresh response in our mock returns access_token=REFRESHED-OAUTH; + # the mint POST must use it as Bearer (proves the refresh-then-mint + # ordering wires correctly). + if mint_record["headers"].get("Authorization") != "Bearer REFRESHED-OAUTH": + failures.append( + f"mint POST should carry REFRESHED-OAUTH as Bearer; got " + f"{mint_record['headers'].get('Authorization')}" + ) + # Refresh response should also have rotated the refresh_token in shared state. + if lm._shared_state.refresh_token != "REFRESHED-REFRESH": + failures.append( + f"rotated refresh_token should be persisted; got " + f"{lm._shared_state.refresh_token}" + ) + + print(_summary("oauth_expiring_refreshes_then_mints")) + return failures + + +def scenario_inference_uses_minted_agent_key(port: int) -> List[str]: + """End-to-end: construct LM (mints), then make a real LiteLLM call. + The inference POST's Authorization header must be the MINTED agent_key + — proving we're not silently routing the OAuth access_token through + as the inference Bearer (the bug this whole PR fixes). + """ + failures = [] + _reset_recordings() + lm = _make_lm(port=port) + + try: + lm(messages=[{"role": "user", "content": "hello"}]) + except Exception as exc: + failures.append(f"inference call raised unexpectedly: {type(exc).__name__}: {exc}") + + infer_records = [r for r in _RecordingHandler.recorded if "/chat/completions" in r["path"]] + if not infer_records: + failures.append("no inference call recorded") + else: + auth = infer_records[0]["headers"].get("Authorization", "") + if not auth.startswith("Bearer MINTED-AGENT-KEY"): + failures.append( + f"inference Bearer should be the minted agent_key; got {auth}" + ) + + print(_summary("inference_uses_minted_agent_key")) + return failures + + +def scenario_inference_401_triggers_remint_and_retry(port: int) -> List[str]: + """Inference 401 (e.g., agent_key revoked mid-run) → force re-mint + via NousLM's forward 401 handler, then retry the inference once. + """ + failures = [] + _reset_recordings() + _RecordingHandler.behavior["infer_first_status"] = 401 + lm = _make_lm(port=port) + + try: + lm(messages=[{"role": "user", "content": "hello"}]) + except Exception as exc: + failures.append(f"inference call raised after retry: {type(exc).__name__}: {exc}") + + infer_count = sum( + 1 for r in _RecordingHandler.recorded if "/chat/completions" in r["path"] + ) + mint_count = sum( + 1 for r in _RecordingHandler.recorded if r["path"].endswith("/api/oauth/agent-key") + ) + # Expect: 1 initial mint (constructor), 1 first inference (401), 1 force re-mint, 1 retry inference (200) + if infer_count != 2: + failures.append(f"expected 2 inference calls (1 fail + 1 retry); got {infer_count}") + if mint_count != 2: + failures.append(f"expected 2 mint calls (initial + force re-mint); got {mint_count}") + + print(_summary("inference_401_triggers_remint_and_retry")) + return failures + + +def scenario_oauth_invalid_grant_surfaces_error(port: int) -> List[str]: + """Refresh failure with invalid_grant must raise HermesProviderError + pointing operator at `hermes model`. + """ + from evolution.core.hermes_provider import HermesProviderError + + failures = [] + _reset_recordings() + _RecordingHandler.behavior["refresh_status"] = 400 + _RecordingHandler.behavior["refresh_body"] = { + "error": "invalid_grant", + "error_description": "refresh token is no longer valid", + } + + raised = None + try: + _make_lm(port=port, oauth_expires_at=time.time() + 30) + except HermesProviderError as exc: + raised = str(exc) + + if raised is None: + failures.append("expected HermesProviderError; nothing raised") + elif "hermes model" not in raised: + failures.append(f"recovery hint missing from error: {raised}") + + print(_summary("oauth_invalid_grant_surfaces_error")) + return failures + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def main() -> int: + server = _start_server() + port = _PORT_HOLDER["port"] + print(f"Mock Nous Portal listening on http://127.0.0.1:{port}") + + # Suppress LiteLLM background telemetry chatter that pollutes the smoke + # output without affecting wire-level behavior. + litellm.suppress_debug_info = True + + all_failures: List[str] = [] + for scenario in ( + scenario_initial_mint, + scenario_oauth_expiring_refreshes_then_mints, + scenario_inference_uses_minted_agent_key, + scenario_inference_401_triggers_remint_and_retry, + scenario_oauth_invalid_grant_surfaces_error, + ): + failures = scenario(port) + for f in failures: + all_failures.append(f"{scenario.__name__}: {f}") + + server.shutdown() + + print("\n" + "=" * 60) + if all_failures: + print(f"FAIL: {len(all_failures)} assertion(s) failed:") + for f in all_failures: + print(f" - {f}") + return 1 + print("PASS: All assertions passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From c58dfa0c810d35abce10b700b13210e262994765 Mon Sep 17 00:00:00 2001 From: Justin Ramos Date: Fri, 15 May 2026 09:44:38 -0600 Subject: [PATCH 5/5] fix(nous_lm): harden against silent failures surfaced in review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review pass found a handful of silent-failure paths and one factually wrong docstring; all addressed here. oauth_helpers.parse_iso_or_epoch: Reject bool, inf, nan, negative, and naive-datetime inputs that would silently produce wrong epoch values: * inf/nan: every skew check evaluates as "now >= inf" → False, so the token would be treated as eternally fresh and never refreshed * naive ISO: datetime.timestamp() interprets in the host's local TZ, silently corrupting the skew window by hours on non-UTC hosts * bool: subclass of int, would coerce True → 1.0 epoch seconds Module docstring also rewritten to describe actual current consumer (NousLM only — the previous "shared by Codex" claim was aspirational; Codex parses expires_at inline as a raw float and doesn't import this). nous_lm._refresh_oauth and _absorb_mint_response: expires_in fields now reject bool explicitly — without the guard, expires_in: True is accepted as 1 second (isinstance(True, int) is True in Python), triggering perpetual re-mint storms. Also: when both expires_at AND expires_in are absent or unusable, the code now logs a warning before falling through to the conservative TTL floor, so a portal protocol change that drops both fields is at least visible in the run log instead of silently caching a key for 30 minutes. nous_lm.NousLM.__init__: HERMES_PORTAL_BASE_URL and NOUS_INFERENCE_BASE_URL env vars are now read at instance time, not module-import time. Previously the docs advertised both as "overridable" but if anything imported nous_lm before the operator set the var, the override was silently ignored. The manual smoke harness already worked around this; this fix makes the documented behavior actually true. nous_lm.NousLM.forward / aforward: When the post-401 retry ALSO returns 401, wrap with HermesProviderError that names the recovery action ("OAuth grant may have been revoked; run hermes model"). Previously a bare litellm.AuthenticationError propagated with no signal that recovery had been attempted. nous_lm._SharedNousState.__post_init__: Reject construction with agent_key set but agent_key_expires_at None (or vice versa). The runtime path defensively treats this as "always re-mint" — surfacing the construction-time mistake loudly is cheaper than letting it cause silent re-mint storms in production. hermes_provider._maybe_resolve_nous_lm: When the credential pool entry has access_token but neither refresh_token NOR agent_key, raise HermesProviderError pointing at `hermes model` recovery. This is almost certainly a partial OAuth setup (interrupted hermes model run) that would otherwise let the caller fall through to direct pass-through and 401 against Nous's inference endpoint with no breadcrumb. Pool entries with agent_key set still fall through unchanged (genuine inference-only credentials). Comment-rot cleanup: Stripped six `hermes_cli/auth.py:NNNN-NNNN` line-number references from nous_lm.py and docs/model_resolution.md. Replaced with symbol references. Codex's nous_lm-equivalent (codex_lm.py) got this right and Nous followed the wrong precedent; aligning now. New test coverage (~20 cases): Refresh + mint malformed-JSON paths, network-error wrapping (httpx ConnectError), OAuth 403 / mint 403 status-code-triggers-relogin, agent_key field-name alias, ISO expires_at parsing, bool expires_in hits floor, async-path 401 recovery (sync had it; async test previously mocked the thing under test), partial-OAuth-setup error in the resolver, _SharedNousState __post_init__ guard, and the new parse_iso_or_epoch rejection paths. --- docs/model_resolution.md | 2 +- evolution/core/hermes_provider.py | 23 +- evolution/core/nous_lm.py | 149 +++++++++---- evolution/core/oauth_helpers.py | 54 ++++- tests/core/test_nous_lm.py | 334 +++++++++++++++++++++++++++++- tests/core/test_nous_provider.py | 34 ++- 6 files changed, 535 insertions(+), 61 deletions(-) diff --git a/docs/model_resolution.md b/docs/model_resolution.md index 18277da1..10823482 100644 --- a/docs/model_resolution.md +++ b/docs/model_resolution.md @@ -159,7 +159,7 @@ model: When the resolver detects a Nous credential pool entry with a `refresh_token` (signals OAuth-managed flow), the framework instantiates a `NousLM` subclass that: 1. **Mints a fresh agent_key at preflight time** by POSTing to `{portal}/api/oauth/agent-key` with the OAuth access_token as Bearer. -2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing at `hermes_cli/auth.py:3061-3193`. +2. **Refreshes the OAuth access_token in-memory** when it's within 120s of expiry — POSTed to `{portal}/api/oauth/token` with the standard refresh_token grant. Mirrors Hermes's own refresh-first-then-mint sequencing in `hermes_cli/auth.py`. 3. **Re-mints on inference 401** (mid-run agent_key revocation or expiration). The four LM roles (optimizer, reflection, eval, judge) coordinate through a shared lock so a four-thread evolution doesn't race the portal's single-use refresh-token rotation. The portal URL is overridable via `HERMES_PORTAL_BASE_URL` (Hermes's own env var name; sharing keeps configs portable for stage / mock setups). diff --git a/evolution/core/hermes_provider.py b/evolution/core/hermes_provider.py index 8c7c66f9..0a794dba 100644 --- a/evolution/core/hermes_provider.py +++ b/evolution/core/hermes_provider.py @@ -753,13 +753,24 @@ def _maybe_resolve_nous_lm( ) refresh_token = _str_or_none(pool_entry.get("refresh_token")) + agent_key = _str_or_none(pool_entry.get("agent_key")) if not refresh_token: - # OAuth flow not in play; let the caller use the existing - # env-var/access_token-as-Bearer path. Note this path will still - # 401 against Nous's inference endpoint when the field actually - # holds the OAuth access_token (Nous needs the minted agent_key), - # but we can't tell from here whether the user intends OAuth or - # env-var, so the conservative posture is "don't change behavior". + # An entry with agent_key set is plausibly env-var-style or + # hand-edited inference-only — let it fall through to the + # generic OpenAI-wire handler with whatever Bearer it carries. + # An entry with NEITHER refresh_token NOR agent_key is almost + # certainly a partial OAuth setup (interrupted hermes model run, + # or the portal handed back access_token only). Inference would + # 401 with no breadcrumb pointing at the missing credentials, so + # raise here with a specific recovery hint. + if agent_key is None: + raise HermesProviderError( + "~/.hermes/auth.json credential_pool[\"nous\"] entry has " + "an access_token but no refresh_token or agent_key — " + "looks like a partial OAuth setup. Run `hermes model` " + "and select Nous Portal to complete authentication, or " + f"pass --{role}-model to bypass Hermes resolution." + ) return None access_token = _str_or_none(pool_entry.get("access_token")) diff --git a/evolution/core/nous_lm.py b/evolution/core/nous_lm.py index ec2df197..8e7feca5 100644 --- a/evolution/core/nous_lm.py +++ b/evolution/core/nous_lm.py @@ -10,14 +10,14 @@ (``inference-api.nousresearch.com``) requires the **agent_key** as Bearer — not the access_token. -This module mirrors Hermes's own ``resolve_nous_runtime_credentials`` flow -at ``hermes_cli/auth.py:3061-3193``: refresh the OAuth token first if -expiring, then mint a fresh agent_key from it. On inference 401, force -re-mint and retry once. State is shared across LM instances via -``_STATE_BY_KEY`` so the four LM roles (optimizer, reflection, eval, -judge) coordinate through one lock and one mint per refresh window — -without this, four parallel workers entering the skew window would each -mint and three would race the portal. +Mirrors Hermes's ``resolve_nous_runtime_credentials`` flow in +``hermes_cli/auth.py``: refresh the OAuth token first if expiring, then +mint a fresh agent_key from it. On inference 401, force re-mint and retry +once. State is shared across LM instances via ``_STATE_BY_KEY`` so the +four LM roles (optimizer, reflection, eval, judge) coordinate through +one lock and one mint per refresh window — without this, four parallel +workers entering the skew window would each mint and three would race +the portal's single-use refresh-token rotation. In-memory only — no auth.json writeback. Long evolutions (>30 min on a fresh agent_key) refresh in-process, but the on-disk store stays at @@ -27,6 +27,7 @@ from __future__ import annotations +import logging import os import threading import time @@ -40,26 +41,26 @@ from evolution.core.hermes_provider import HermesProviderError from evolution.core.oauth_helpers import parse_iso_or_epoch +_log = logging.getLogger(__name__) -# Mirrors hermes_cli/auth.py:67-72 — reading the same constants keeps -# us in lockstep with Hermes's own behavior. Override via env vars when -# pointing at a stage portal or local mock for tests. -NOUS_PORTAL_BASE_URL = os.getenv( - "HERMES_PORTAL_BASE_URL", "https://portal.nousresearch.com" -) -NOUS_INFERENCE_BASE_URL = os.getenv( - "NOUS_INFERENCE_BASE_URL", "https://inference-api.nousresearch.com/v1" -) + +# Hardcoded defaults; the constructor reads ``HERMES_PORTAL_BASE_URL`` and +# ``NOUS_INFERENCE_BASE_URL`` env vars at instance time so tests and stage +# setups can override them post-import. Module-level capture would freeze +# the values at first import, before any test or operator could intervene. +NOUS_PORTAL_BASE_URL = "https://portal.nousresearch.com" +NOUS_INFERENCE_BASE_URL = "https://inference-api.nousresearch.com/v1" NOUS_OAUTH_CLIENT_ID = "hermes-cli" # Refresh OAuth access tokens 2 minutes before they expire and re-mint -# the inference agent_key 2 minutes before it expires. Hermes uses the -# same constants at hermes_cli/auth.py:71-72; matching keeps multi-process -# workloads from racing each other onto the wire. +# the inference agent_key 2 minutes before it expires. Mirrors Hermes's +# ``ACCESS_TOKEN_REFRESH_SKEW_SECONDS`` so multi-process workloads don't +# race each other onto the wire on different cadences. OAUTH_REFRESH_SKEW_SECONDS = 120 AGENT_KEY_REFRESH_SKEW_SECONDS = 120 # Ask the portal for at least 30 minutes of agent_key TTL on each mint; -# the portal is free to grant more. Mirrors DEFAULT_AGENT_KEY_MIN_TTL. +# the portal is free to grant more. Mirrors Hermes's +# ``DEFAULT_AGENT_KEY_MIN_TTL_SECONDS``. AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 @@ -83,6 +84,19 @@ class _SharedNousState: agent_key_expires_at: Optional[float] lock: threading.Lock + def __post_init__(self) -> None: + # An agent_key without an expiry trips _agent_key_needs_mint into + # "always re-mint" mode, which is defensive but masks the + # construction-time mistake of seeding partial state. Pin the + # invariant so the failure surfaces loudly at construction. + if (self.agent_key and self.agent_key_expires_at is None) or ( + self.agent_key_expires_at is not None and not self.agent_key + ): + raise ValueError( + "_SharedNousState: agent_key and agent_key_expires_at " + "must be set together (or both None)" + ) + def __deepcopy__(self, memo): # NousLM uses dspy.LM.copy() (which deepcopies the whole instance) # to apply role-specific kwargs. Locks aren't deep-copyable, and @@ -148,12 +162,27 @@ def __init__( inference_base_url: Optional[str] = None, **kwargs: Any, ) -> None: - kwargs["api_base"] = inference_base_url or NOUS_INFERENCE_BASE_URL + # Resolve URLs at construction time (not module-import time) so + # tests and stage setups can override via env vars after the + # framework is loaded. ``HERMES_PORTAL_BASE_URL`` is Hermes's own + # variable name — sharing keeps a single ``export`` portable. + effective_portal = ( + portal_base_url + or os.getenv("HERMES_PORTAL_BASE_URL") + or NOUS_PORTAL_BASE_URL + ) + effective_inference = ( + inference_base_url + or os.getenv("NOUS_INFERENCE_BASE_URL") + or NOUS_INFERENCE_BASE_URL + ) + + kwargs["api_base"] = effective_inference kwargs["api_key"] = agent_key or "" super().__init__(model=model, **kwargs) - self._portal_base_url = portal_base_url or NOUS_PORTAL_BASE_URL + self._portal_base_url = effective_portal # The lookup key for shared state — falls back to id(self) so test # scenarios with synthetic creds get per-instance isolation rather @@ -282,18 +311,30 @@ def _refresh_oauth(self) -> None: "Run `hermes model` to re-authenticate." ) - # Refresh tokens may rotate (single-use semantics). Honor the new - # one if present; missing means the portal kept the original valid. + # The Nous portal enforces single-use refresh-token rotation; + # honor any rotated token in the response. Missing means the + # portal kept the original valid. new_refresh = payload.get("refresh_token") if isinstance(new_refresh, str) and new_refresh.strip(): self._shared_state.refresh_token = new_refresh.strip() expires_in = payload.get("expires_in") - if isinstance(expires_in, (int, float)) and expires_in > 0: + if ( + isinstance(expires_in, (int, float)) + and not isinstance(expires_in, bool) + and expires_in > 0 + ): self._shared_state.oauth_expires_at = time.time() + float(expires_in) else: # Conservative 1h fallback if the field is missing — keeps the - # next call from racing to the wire again immediately. + # next call from racing to the wire again immediately. Logged + # so a portal protocol change that drops expires_in is at + # least visible in the run log. + _log.warning( + "Nous OAuth refresh response had no usable expires_in; " + "using 1h fallback. payload keys: %s", + sorted(payload.keys()), + ) self._shared_state.oauth_expires_at = time.time() + 3600.0 self._shared_state.access_token = new_access.strip() @@ -305,9 +346,9 @@ def _refresh_oauth(self) -> None: def _mint_agent_key(self, *, allow_oauth_retry: bool) -> None: """POST agent-key mint; on 401, optionally refresh OAuth and retry. - Mirrors Hermes's mint-401-triggers-refresh-retry pattern at - ``hermes_cli/auth.py:3122-3174``. ``allow_oauth_retry`` is True on - the first call from ``_ensure_credentials``; the recursive retry + Mirrors Hermes's mint-401-triggers-refresh-retry pattern in + ``hermes_cli/auth.py``. ``allow_oauth_retry`` is True on the + first call from ``_ensure_credentials``; the recursive retry passes False to bound the recursion at one OAuth refresh. """ try: @@ -365,10 +406,24 @@ def _absorb_mint_response(self, response: httpx.Response) -> None: new_expires_at = parse_iso_or_epoch(payload.get("expires_at")) if new_expires_at is None: expires_in = payload.get("expires_in") - if isinstance(expires_in, (int, float)) and expires_in > 0: + if ( + isinstance(expires_in, (int, float)) + and not isinstance(expires_in, bool) + and expires_in > 0 + ): new_expires_at = time.time() + float(expires_in) else: - # Conservative — assume the floor TTL we asked for. + # Conservative — assume the floor TTL we asked for. Log + # so a portal protocol change that drops both expiry + # fields is at least visible in the run log; otherwise + # we silently cache a key for 30 minutes regardless of + # what the server intended. + _log.warning( + "Nous mint response had no usable expires_at or " + "expires_in; using AGENT_KEY_MIN_TTL_SECONDS " + "fallback. payload keys: %s", + sorted(payload.keys()), + ) new_expires_at = time.time() + AGENT_KEY_MIN_TTL_SECONDS self._shared_state.agent_key = agent_key.strip() @@ -385,10 +440,20 @@ def forward(self, prompt=None, messages=None, **kwargs): # type: ignore[overrid except litellm.AuthenticationError: # Cached agent_key is dead despite passing the skew check. # Force re-mint (which may also refresh OAuth on its own 401) - # and retry once. A second 401 propagates so the auth-abort - # sentinel + cost-ceiling path catches it. + # and retry once. If the freshly-minted key is also rejected + # the OAuth grant has likely been revoked entirely; surface + # that explicitly so the operator gets the right recovery + # hint instead of a generic 401. self._force_remint() - return super().forward(prompt=prompt, messages=messages, **kwargs) + try: + return super().forward(prompt=prompt, messages=messages, **kwargs) + except litellm.AuthenticationError as exc: + raise HermesProviderError( + "Nous Portal inference rejected a freshly-minted " + "agent_key after an automatic re-mint. The OAuth " + "grant may have been revoked. Run `hermes model` " + "and select Nous Portal to re-authenticate." + ) from exc async def aforward(self, prompt=None, messages=None, **kwargs): # type: ignore[override] self._ensure_credentials() @@ -396,7 +461,17 @@ async def aforward(self, prompt=None, messages=None, **kwargs): # type: ignore[ return await super().aforward(prompt=prompt, messages=messages, **kwargs) except litellm.AuthenticationError: self._force_remint() - return await super().aforward(prompt=prompt, messages=messages, **kwargs) + try: + return await super().aforward( + prompt=prompt, messages=messages, **kwargs + ) + except litellm.AuthenticationError as exc: + raise HermesProviderError( + "Nous Portal inference rejected a freshly-minted " + "agent_key after an automatic re-mint. The OAuth " + "grant may have been revoked. Run `hermes model` " + "and select Nous Portal to re-authenticate." + ) from exc # ---------------------------------------------------------------------- @@ -410,7 +485,7 @@ async def aforward(self, prompt=None, messages=None, **kwargs): # type: ignore[ def _format_oauth_error(response: httpx.Response) -> str: """Translate a non-200 OAuth refresh response into an actionable user - message. Mirrors hermes_cli/auth.py:2595-2624. + message. Mirrors the OAuth-error classification in ``hermes_cli/auth.py``. """ code, detail = _parse_error_body(response) diff --git a/evolution/core/oauth_helpers.py b/evolution/core/oauth_helpers.py index 7efbe5e6..8ac15fa2 100644 --- a/evolution/core/oauth_helpers.py +++ b/evolution/core/oauth_helpers.py @@ -1,12 +1,14 @@ -"""Shared OAuth helpers used by Codex and Nous LM wrappers. +"""OAuth helper utilities for NousLM. -Kept as a small standalone module so the next OAuth provider that needs -in-memory refresh has somewhere obvious to drop shared utilities without -bloating either provider's LM file. +Currently only consumed by ``NousLM`` and ``_maybe_resolve_nous_lm``. +Lives as its own module so the next OAuth provider that needs in-memory +refresh has somewhere obvious to drop shared utilities without bloating +either consumer's file. """ from __future__ import annotations +import math from datetime import datetime from typing import Any, Optional @@ -22,13 +24,19 @@ def parse_iso_or_epoch(value: Any) -> Optional[float]: * Older or hand-edited entries may omit it entirely. Returns the equivalent Unix epoch float, or None when the value is - missing, malformed, or has no parseable shape. Callers treat None as - "unknown" — typically meaning "trigger a refresh" defensively. + missing, malformed, has no parseable shape, or fails sanity checks + (non-finite, negative, or naive datetime that would silently pull + in the host TZ instead of the intended UTC). + + Callers treat None as "unknown" — typically meaning "trigger a + refresh" defensively. """ - if value is None: + if value is None or isinstance(value, bool): + # bool is a subclass of int; reject before the numeric path so a + # stray ``True`` isn't silently turned into 1.0 epoch seconds. return None if isinstance(value, (int, float)): - return float(value) + return _validated(float(value)) if isinstance(value, str): s = value.strip() if not s: @@ -38,12 +46,36 @@ def parse_iso_or_epoch(value: Any) -> Optional[float]: if s.endswith("Z"): s = s[:-1] + "+00:00" try: - return datetime.fromisoformat(s).timestamp() + dt = datetime.fromisoformat(s) except ValueError: - pass + dt = None + if dt is not None: + if dt.tzinfo is None: + # Naive datetime: Python's .timestamp() would interpret + # in the host's local timezone, which silently corrupts + # the skew window by hours on non-UTC hosts. Reject so + # the caller treats it as "unknown" rather than producing + # a confidently-wrong epoch. + return None + return _validated(dt.timestamp()) # Numeric-looking string ("1747299600") — treat as epoch seconds. try: - return float(s) + return _validated(float(s)) except ValueError: return None return None + + +def _validated(epoch: float) -> Optional[float]: + """Reject inf, nan, and negative epoch values. + + ``inf`` would make every skew check evaluate ``something >= inf`` → + False, so the token would be treated as eternally fresh and never + refreshed (silent "wrong answer" failure). ``nan`` has the same + failure mode (all comparisons against nan are False). Negatives are + structurally absurd for an expires_at and most likely indicate a + parser bug upstream. + """ + if not math.isfinite(epoch) or epoch < 0: + return None + return epoch diff --git a/tests/core/test_nous_lm.py b/tests/core/test_nous_lm.py index 156b8b35..25c16caa 100644 --- a/tests/core/test_nous_lm.py +++ b/tests/core/test_nous_lm.py @@ -18,7 +18,7 @@ import threading import time from concurrent.futures import ThreadPoolExecutor -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import httpx import litellm @@ -326,7 +326,11 @@ def test_forward_propagates_second_401(self): mock_cls.return_value = _mock_httpx_post( [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})] ) - with pytest.raises(litellm.AuthenticationError): + # The second 401 (after re-mint) is wrapped as a + # HermesProviderError that names the recovery action ("OAuth + # grant may have been revoked"), so the operator gets a + # specific message instead of a bare 401. + with pytest.raises(HermesProviderError, match="re-mint"): lm.forward(messages=[{"role": "user", "content": "hi"}]) @@ -484,6 +488,298 @@ def test_format_mint_error_extracts_openai_shape(self): # --------------------------------------------------------------------------- +class TestResponseShapeEdgeCases: + """Coverage for protocol-rev edge cases that the unit tests originally + glossed over: malformed JSON, alternate field names, fallback TTLs, + and the bool-as-numeric trap. + """ + + def test_refresh_response_missing_access_token_raises(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={"refresh_token": "x", "expires_in": 3600} + ) + ] + ) + with pytest.raises(HermesProviderError, match="missing access_token"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="r", + oauth_expires_at=time.time() + 30, # forces refresh + ) + + def test_refresh_response_malformed_json_raises(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(status_code=200, json_body=None)] # .json() raises + ) + with pytest.raises(HermesProviderError, match="invalid JSON"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="r", + oauth_expires_at=time.time() + 30, + ) + + def test_mint_response_missing_api_key_raises(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"expires_in": 1800})] + ) + with pytest.raises(HermesProviderError, match="missing api_key"): + NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + + def test_mint_response_malformed_json_raises(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(status_code=200, json_body=None)] + ) + with pytest.raises(HermesProviderError, match="invalid JSON"): + NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + + def test_mint_response_uses_agent_key_field_alias(self): + # The portal historically used `agent_key`; current shape is + # `api_key`. Forward/back compat: either should work. + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={ + "agent_key": "minted-via-old-shape", + "expires_in": 1800, + } + ) + ] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + assert lm.kwargs["api_key"] == "minted-via-old-shape" + + def test_mint_response_iso_expires_at_parsed(self): + # The current portal returns expires_at as ISO 8601; verify the + # parser flows through without falling to the expires_in branch. + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={ + "api_key": "minted", + "expires_at": "2026-12-01T00:00:00+00:00", + # expires_in omitted on purpose — exercises the + # expires_at-wins-when-both-present branch + } + ) + ] + ) + lm = NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + # 2026-12-01T00:00:00+00:00 → epoch 1796083200 + assert lm._shared_state.agent_key_expires_at == 1796083200.0 + + def test_mint_response_bool_expires_in_falls_to_floor(self): + # isinstance(True, int) is True in Python; without explicit + # bool exclusion, True would be cached as a 1-second TTL, + # triggering perpetual re-mint. The bool guard pushes us to + # the conservative 30-min floor instead. + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + json_body={"api_key": "minted", "expires_in": True} + ) + ] + ) + before = time.time() + lm = NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + # 30-minute floor, not 1 second. + assert lm._shared_state.agent_key_expires_at >= before + 1700 + + +class TestNetworkErrorWrapping: + def test_refresh_httpx_error_wrapped(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + client = MagicMock() + client.__enter__.return_value = client + client.post.side_effect = httpx.ConnectError("dns failure") + mock_cls.return_value = client + with pytest.raises(HermesProviderError, match="OAuth refresh"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="r", + oauth_expires_at=time.time() + 30, + ) + + def test_mint_httpx_error_wrapped(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + client = MagicMock() + client.__enter__.return_value = client + client.post.side_effect = httpx.ConnectError("dns failure") + mock_cls.return_value = client + with pytest.raises(HermesProviderError, match="agent-key mint"): + NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + + +class TestStatusCodeRelogin: + def test_oauth_403_triggers_relogin_even_with_unknown_code(self): + # _format_oauth_error special-cases 401/403 status to force the + # relogin message even when the JSON error code isn't in the + # known-relogin set. Catches portal returns like a tenant-disabled + # 403 with code="access_denied". + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + status_code=403, + json_body={"error": "access_denied"}, + ) + ] + ) + with pytest.raises(HermesProviderError, match="hermes model"): + NousLM( + model="openai/test-model", + access_token="stale", + refresh_token="r", + oauth_expires_at=time.time() + 30, + ) + + def test_mint_403_triggers_relogin(self): + # Mint 401 has the refresh-retry path; mint 403 doesn't (it + # signals tenant-side denial that won't recover from a fresh + # access_token). Should surface the relogin message directly. + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [ + _mock_response( + status_code=403, + json_body={"error": "tenant_suspended"}, + ) + ] + ) + with pytest.raises(HermesProviderError, match="hermes model"): + NousLM( + model="openai/test-model", + access_token="oauth", + refresh_token="r", + oauth_expires_at=time.time() + 86400, + ) + + +class TestAsyncForce401Recovery: + """The sync path has explicit retry + propagate tests; the async + path has neither equivalent. Mirror them to keep the two paths + from drifting silently. + """ + + def _build_lm_with_initial_mint(self): + with patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "first-mint", "expires_in": 1800})] + ) + return NousLM( + model="openai/test-model", + access_token="oauth-tok", + refresh_token="refresh-tok", + oauth_expires_at=time.time() + 86400, + ) + + def test_aforward_recovers_from_401_with_remint_and_retry(self): + lm = self._build_lm_with_initial_mint() + with patch("dspy.LM.aforward", new_callable=AsyncMock) as mock_super, \ + patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + err = litellm.AuthenticationError( + message="401", + llm_provider="openai", + model="openai/test-model", + ) + # AsyncMock with a list side_effect raises exceptions in + # sequence and returns non-exception items as the await value. + mock_super.side_effect = [err, "ok"] + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "post-401-mint", "expires_in": 1800})] + ) + result = asyncio.run( + lm.aforward(messages=[{"role": "user", "content": "hi"}]) + ) + assert result == "ok" + assert mock_super.await_count == 2 + assert lm.kwargs["api_key"] == "post-401-mint" + + def test_aforward_propagates_second_401_as_hermes_provider_error(self): + lm = self._build_lm_with_initial_mint() + with patch("dspy.LM.aforward", new_callable=AsyncMock) as mock_super, \ + patch("evolution.core.nous_lm.httpx.Client") as mock_cls: + err = litellm.AuthenticationError( + message="401", llm_provider="openai", model="openai/test-model" + ) + mock_super.side_effect = [err, err] + mock_cls.return_value = _mock_httpx_post( + [_mock_response(json_body={"api_key": "remint", "expires_in": 1800})] + ) + with pytest.raises(HermesProviderError, match="re-mint"): + asyncio.run( + lm.aforward(messages=[{"role": "user", "content": "hi"}]) + ) + + +class TestSharedStateInvariants: + def test_post_init_rejects_partial_agent_key_state(self): + # _SharedNousState __post_init__ catches the construction-time + # mistake of seeding agent_key without a paired expires_at — + # which would otherwise silently force perpetual re-mint. + from evolution.core.nous_lm import _SharedNousState + + with pytest.raises(ValueError, match="set together"): + _SharedNousState( + access_token="x", + refresh_token="r", + oauth_expires_at=None, + agent_key="orphan-key", + agent_key_expires_at=None, + lock=threading.Lock(), + ) + with pytest.raises(ValueError, match="set together"): + _SharedNousState( + access_token="x", + refresh_token="r", + oauth_expires_at=None, + agent_key=None, + agent_key_expires_at=time.time() + 1800, + lock=threading.Lock(), + ) + + class TestParseIsoOrEpoch: def test_iso8601_with_offset(self): result = parse_iso_or_epoch("2026-05-15T10:30:00+00:00") @@ -512,3 +808,37 @@ def test_empty_string_returns_none(self): def test_garbage_returns_none(self): assert parse_iso_or_epoch("not-a-timestamp") is None + + def test_inf_returns_none(self): + # inf would silently make every skew check evaluate as + # "something >= inf" → False, so the token would be treated as + # eternally fresh. The validator must reject. + assert parse_iso_or_epoch(float("inf")) is None + assert parse_iso_or_epoch("inf") is None + assert parse_iso_or_epoch("Infinity") is None + + def test_nan_returns_none(self): + # All comparisons against nan are False — same eternal-freshness + # trap as inf. + assert parse_iso_or_epoch(float("nan")) is None + assert parse_iso_or_epoch("nan") is None + + def test_negative_returns_none(self): + # Structurally absurd for an expires_at; usually a parser bug + # upstream. Reject so the caller treats as "unknown". + assert parse_iso_or_epoch(-100) is None + assert parse_iso_or_epoch(-0.5) is None + + def test_naive_iso_returns_none(self): + # datetime.fromisoformat("2026-05-15T10:30:00") returns a naive + # datetime; .timestamp() then interprets in the host's local TZ, + # silently corrupting the skew window by hours on non-UTC hosts. + # The validator rejects naive datetimes so the caller treats as + # "unknown" rather than producing a confidently-wrong epoch. + assert parse_iso_or_epoch("2026-05-15T10:30:00") is None + + def test_bool_returns_none(self): + # bool subclasses int; without an explicit reject, True/False + # would silently coerce to 1.0 / 0.0 epoch — meaningless. + assert parse_iso_or_epoch(True) is None + assert parse_iso_or_epoch(False) is None diff --git a/tests/core/test_nous_provider.py b/tests/core/test_nous_provider.py index 1ee4ec6f..f480edc9 100644 --- a/tests/core/test_nous_provider.py +++ b/tests/core/test_nous_provider.py @@ -162,12 +162,14 @@ def test_custom_inference_base_url_flows_through(self, hermes_home): class TestNousResolutionFallbacks: - def test_pool_entry_without_refresh_token_falls_back_to_direct( + def test_pool_entry_with_agent_key_no_refresh_falls_back_to_direct( self, hermes_home ): - # Env-var-style: pool has only access_token, no refresh_token. - # The resolver must fall through to the existing OpenAI-wire - # direct-pass-through path so a NOUS_API_KEY user keeps working. + # Hand-edited or inference-only entry: has access_token + agent_key + # but no refresh_token. The resolver must fall through to the + # existing OpenAI-wire direct-pass-through path. The agent_key + # presence signals "this is an inference-ready credential, not a + # partial OAuth setup." _write_config( hermes_home, """ @@ -181,6 +183,7 @@ def test_pool_entry_without_refresh_token_falls_back_to_direct( access_token="bare-api-key", refresh_token=None, oauth_expires_at=None, + agent_key="inference-ready-bearer", ) resolved = resolve_default_lm(role="optimizer", hermes_home=hermes_home) # Direct pass-through path: openai/, api_base + api_key in lm_kwargs, @@ -189,6 +192,29 @@ def test_pool_entry_without_refresh_token_falls_back_to_direct( assert resolved.model == "openai/Hermes-4-405B" assert resolved.lm_kwargs.get("api_key") == "bare-api-key" + def test_pool_entry_without_refresh_or_agent_key_raises(self, hermes_home): + # Partial OAuth setup: pool entry has access_token but no + # refresh_token AND no agent_key. Almost certainly an interrupted + # `hermes model` run. Raising here gives the operator a specific + # recovery hint instead of letting inference 401 with no breadcrumb. + _write_config( + hermes_home, + """ + model: + default: Hermes-4-405B + provider: nous + """, + ) + _write_nous_pool( + hermes_home, + access_token="oauth-only", + refresh_token=None, + oauth_expires_at=None, + agent_key=None, + ) + with pytest.raises(HermesProviderError, match="partial OAuth setup"): + resolve_default_lm(role="optimizer", hermes_home=hermes_home) + def test_missing_pool_entry_surfaces_recovery_hint(self, hermes_home): _write_config( hermes_home,