diff --git a/evolution/validation/hermes_runner.py b/evolution/validation/hermes_runner.py index 4a4dacd1..f93e118c 100644 --- a/evolution/validation/hermes_runner.py +++ b/evolution/validation/hermes_runner.py @@ -32,6 +32,36 @@ _FINAL_TEXT_TAIL_BYTES = 4096 DEFAULT_TASK_TIMEOUT_SECONDS = 120 +# Known LiteLLM provider prefixes the rest of the framework uses +# (DSPy / litellm convention: ``/``). The hermes -m flag +# interprets the same shape as openrouter-style routing — passing +# ``openai/gpt-4o-mini`` silently switches base_url to openrouter.ai and +# breaks auth for direct-provider configs, producing a 0-turn session that +# the saturation pre-flight misreports as "validator too weak". We strip +# these prefixes at the hermes boundary so users get the behavior they +# expect when they pass the same model string they use elsewhere. +_LITELLM_PROVIDER_PREFIXES = ( + "openai/", + "anthropic/", + "azure/", + "gemini/", + "cohere/", + "bedrock/", + "mistral/", +) + + +def _strip_litellm_provider_prefix(model: str) -> str: + """Strip a known LiteLLM provider prefix from a model name. + + Returns ``model`` unchanged when no recognized prefix is present, so + openrouter-style routing through an unrecognized vendor still works. + """ + for prefix in _LITELLM_PROVIDER_PREFIXES: + if model.startswith(prefix): + return model[len(prefix):] + return model + class HermesAgentRunner: """Invoke ``hermes -z`` and parse the resulting session JSON. @@ -63,7 +93,22 @@ def __init__( # against a deliberately weaker agent model than the user's # daily-driver default — saturation on capable models hides # behavioral signal that a weaker model would expose. - self.model = model + # + # Normalize LiteLLM-style provider prefixes (``openai/``, etc.) + # before storing: hermes -m treats ``/`` as + # openrouter routing which silently switches the base_url. See + # ``_strip_litellm_provider_prefix`` for the full rationale. + if model is not None: + normalized = _strip_litellm_provider_prefix(model) + if normalized != model: + logger.info( + "Stripped LiteLLM provider prefix from hermes -m model: " + "%r → %r (avoids accidental openrouter routing)", + model, normalized, + ) + self.model = normalized + else: + self.model = None def run(self, ctx: TaskRunContext) -> AgentRunResult: message = ctx.user_message diff --git a/tests/validation/test_hermes_runner.py b/tests/validation/test_hermes_runner.py index c56faa9e..d970c29e 100644 --- a/tests/validation/test_hermes_runner.py +++ b/tests/validation/test_hermes_runner.py @@ -19,10 +19,43 @@ from evolution.validation.agent_runner import TaskRunContext from evolution.validation.hermes_runner import ( HermesAgentRunner, + _strip_litellm_provider_prefix, parse_session_result, ) +class TestStripLitellmProviderPrefix: + """The hermes -m flag interprets '/' as openrouter-style + routing. Direct-provider users naturally pass litellm-formatted names + like 'openai/gpt-4o-mini' from elsewhere in the framework; this helper + normalizes them back to bare model names that hermes -m accepts.""" + + def test_strips_openai_prefix(self): + assert _strip_litellm_provider_prefix("openai/gpt-4o-mini") == "gpt-4o-mini" + + def test_strips_anthropic_prefix(self): + assert _strip_litellm_provider_prefix("anthropic/claude-opus-4-7") == "claude-opus-4-7" + + def test_strips_azure_prefix(self): + assert _strip_litellm_provider_prefix("azure/gpt-4") == "gpt-4" + + def test_strips_gemini_prefix(self): + assert _strip_litellm_provider_prefix("gemini/gemini-1.5-pro") == "gemini-1.5-pro" + + def test_leaves_bare_model_unchanged(self): + assert _strip_litellm_provider_prefix("gpt-4o-mini") == "gpt-4o-mini" + + def test_leaves_unknown_prefix_unchanged(self): + # Unknown providers pass through so openrouter-style routing through + # an unrecognized vendor still works. + assert _strip_litellm_provider_prefix("custom-vendor/model-x") == "custom-vendor/model-x" + + def test_only_strips_first_prefix(self): + # Defensive: nested prefixes (e.g. openrouter passthrough) shouldn't be + # double-stripped — keep the second segment intact. + assert _strip_litellm_provider_prefix("openai/foo/bar") == "foo/bar" + + def _write_session(path: Path, messages: list[dict], **extra) -> None: payload = { "session_id": "test", @@ -346,6 +379,39 @@ def _fake_run(*args, **kwargs): assert "-m" not in captured["args"] assert captured["args"] == ["hermes", "-z", "hello"] + def test_litellm_prefix_stripped_before_minus_m(self, fixture_dir, tmp_path): + """Regression: hermes -m treats '/' as openrouter-style + routing (silently switches base_url to openrouter.ai and breaks auth + for direct-provider configs). Users naturally pass litellm-formatted + names like 'openai/gpt-4o-mini' from elsewhere in the framework, so + the runner must strip known litellm provider prefixes before -m to + avoid a silent 0-turn 'agent never ran' failure that misreports as + 'validator too weak' at the saturation pre-flight.""" + runner = HermesAgentRunner( + user_config_path=tmp_path / "nonexistent", + model="openai/gpt-4o-mini", + ) + captured: dict = {} + + def _fake_run(*args, **kwargs): + captured["args"] = args[0] if args else kwargs.get("args") + sandbox = Path(kwargs["env"]["HERMES_HOME"]) + (sandbox / "sessions").mkdir(exist_ok=True) + _write_session( + sandbox / "sessions" / "session_test.json", + [{"role": "assistant", "content": "ok"}], + ) + return type("CP", (), {"returncode": 0, "stdout": "", "stderr": ""})() + + with patch("evolution.validation.hermes_runner.subprocess.run", side_effect=_fake_run): + runner.run(TaskRunContext( + user_message="debug", + fixture_dir=fixture_dir, + )) + + assert captured["args"][:4] == ["hermes", "-m", "gpt-4o-mini", "-z"] + assert "openai/" not in captured["args"][2] + def test_skills_src_none_means_no_skills_dir_created(self, fixture_dir, tmp_path): """Tool-side runs (no skills_src) must not create an empty skills/ directory in the sandbox — keeps the legacy code path bit-for-bit."""