diff --git a/autobot-backend/chat_workflow/llm_handler.py b/autobot-backend/chat_workflow/llm_handler.py index 0739e2a7f..7287029a8 100644 --- a/autobot-backend/chat_workflow/llm_handler.py +++ b/autobot-backend/chat_workflow/llm_handler.py @@ -17,6 +17,9 @@ from autobot_shared.http_client import get_http_client from constants.model_constants import ModelConstants from dependencies import global_config_manager +from extensions.base import HookContext +from extensions.hooks import HookPoint +from extensions.manager import get_extension_manager from prompt_manager import get_language_instruction, get_prompt, resolve_language from .models import WorkflowSession @@ -27,6 +30,73 @@ _VALID_URL_SCHEMES = ("http://", "https://") +async def _emit_system_prompt_ready(system_prompt: str, session: Any) -> str: + """Emit ON_SYSTEM_PROMPT_READY to registered extensions and return result. + + Issue #3405: Fires after _get_system_prompt() so extensions can inspect or + rewrite the system prompt before it enters prompt assembly. If no extension + is registered for this hook the function is a no-op and the original prompt + is returned unchanged. + + Args: + system_prompt: The assembled system prompt string. + session: WorkflowSession instance (passed as data["session"]). + + Returns: + Possibly modified system prompt string. + """ + ctx = HookContext( + session_id=getattr(session, "session_id", ""), + data={"system_prompt": system_prompt, "session": session}, + ) + result = await get_extension_manager().invoke_with_transform( + HookPoint.SYSTEM_PROMPT_READY, ctx, "system_prompt" + ) + if isinstance(result, str) and result != system_prompt: + logger.debug( + "[#3405] SYSTEM_PROMPT_READY modified system prompt (%d -> %d chars)", + len(system_prompt), + len(result), + ) + return result + return system_prompt + + +async def _emit_full_prompt_ready( + prompt: str, llm_params: Dict[str, Any], context: Dict[str, Any] +) -> str: + """Emit ON_FULL_PROMPT_READY to registered extensions and return result. + + Issue #3405: Fires after _build_full_prompt() so extensions can append + dynamic content (e.g. infrastructure telemetry hints) before the prompt + is sent to the LLM. If no extension is registered for this hook the + function is a no-op and the original prompt is returned unchanged. + + Args: + prompt: The fully assembled prompt string. + llm_params: Dict containing model/endpoint selection. + context: Arbitrary request-level context dict. + + Returns: + Possibly modified full prompt string. + """ + ctx = HookContext( + session_id=context.get("session_id", ""), + data={"prompt": prompt, "llm_params": llm_params, "context": context}, + ) + result = await get_extension_manager().invoke_with_transform( + HookPoint.FULL_PROMPT_READY, ctx, "prompt" + ) + if isinstance(result, str) and result != prompt: + logger.debug( + "[#3405] FULL_PROMPT_READY modified full prompt (%d -> %d chars)", + len(prompt), + len(result), + ) + return result + return prompt + + class LLMHandlerMixin: """Mixin for LLM interaction handling.""" @@ -310,6 +380,7 @@ async def _prepare_llm_request_params( else: ollama_endpoint = self._get_ollama_endpoint_for_model(selected_model) system_prompt = self._get_system_prompt(language=language) + system_prompt = await _emit_system_prompt_ready(system_prompt, session) conversation_context = self._build_conversation_context(session) # Knowledge retrieval for RAG @@ -324,6 +395,11 @@ async def _prepare_llm_request_params( full_prompt = self._build_full_prompt( system_prompt, knowledge_context, conversation_context, message ) + full_prompt = await _emit_full_prompt_ready( + full_prompt, + {"endpoint": ollama_endpoint, "model": selected_model}, + {"session_id": session.session_id, "message": message}, + ) logger.info( "[ChatWorkflowManager] Making Ollama request to: %s", ollama_endpoint diff --git a/autobot-backend/chat_workflow/prompt_hooks_test.py b/autobot-backend/chat_workflow/prompt_hooks_test.py new file mode 100644 index 000000000..f745dd2c2 --- /dev/null +++ b/autobot-backend/chat_workflow/prompt_hooks_test.py @@ -0,0 +1,195 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Unit tests for Issue #3405 — ON_SYSTEM_PROMPT_READY and ON_FULL_PROMPT_READY +plugin hooks in the chat pipeline. + +Tests verify: +1. New HookPoint enum members exist +2. ON_SYSTEM_PROMPT_READY fires with correct args and return value replaces prompt +3. ON_FULL_PROMPT_READY fires with correct args and return value replaces prompt +4. No-op when no extensions are registered for a hook +5. Extension errors do not crash the pipeline +""" + +from typing import Optional +from unittest.mock import AsyncMock, patch + +import pytest + +from extensions.base import Extension, HookContext +from extensions.hooks import HookPoint +from extensions.manager import ExtensionManager, reset_extension_manager +from chat_workflow.llm_handler import _emit_system_prompt_ready, _emit_full_prompt_ready + + +class _SystemPromptWatcher(Extension): + """Extension that records args and returns a modified system prompt.""" + + name = "test_system_prompt_watcher" + + def __init__(self, return_value: Optional[str] = None) -> None: + self._return_value = return_value + self.captured_system_prompt: Optional[str] = None + + async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]: + self.captured_system_prompt = ctx.get("system_prompt") + return self._return_value + + +class _FullPromptWatcher(Extension): + """Extension that records args and returns a modified full prompt.""" + + name = "test_full_prompt_watcher" + + def __init__(self, return_value: Optional[str] = None) -> None: + self._return_value = return_value + self.captured_prompt: Optional[str] = None + self.captured_llm_params: Optional[dict] = None + self.captured_context: Optional[dict] = None + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + self.captured_prompt = ctx.get("prompt") + self.captured_llm_params = ctx.get("llm_params") + self.captured_context = ctx.get("context") + return self._return_value + + +class _ErrorExtension(Extension): + """Extension that always raises an exception.""" + + name = "test_error_extension" + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + raise RuntimeError("simulated extension failure") + + +class _FakeSession: + session_id = "sess-test-001" + metadata: dict = {} + + +@pytest.fixture(autouse=True) +def reset_manager(): + """Ensure global ExtensionManager is reset between tests.""" + reset_extension_manager() + yield + reset_extension_manager() + + +class TestNewHookPoints: + """Verify the new HookPoint members are present.""" + + def test_on_system_prompt_ready_exists(self): + assert HookPoint.SYSTEM_PROMPT_READY is not None + + def test_on_full_prompt_ready_exists(self): + assert HookPoint.FULL_PROMPT_READY is not None + + def test_total_hook_count_increased(self): + # Original 22 hooks + 2 new ones = 24 + assert len(HookPoint) == 24 + + +class TestEmitSystemPromptReady: + """Tests for _emit_system_prompt_ready helper.""" + + @pytest.mark.asyncio + async def test_noop_when_no_extension_registered(self): + """Returns original prompt unchanged when no extension is registered.""" + original = "You are AutoBot." + result = await _emit_system_prompt_ready(original, _FakeSession()) + assert result == original + + @pytest.mark.asyncio + async def test_extension_receives_correct_args(self): + """Extension receives system_prompt and session via HookContext.""" + watcher = _SystemPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "You are AutoBot." + session = _FakeSession() + await _emit_system_prompt_ready(original, session) + + assert watcher.captured_system_prompt == original + + @pytest.mark.asyncio + async def test_return_value_replaces_prompt(self): + """A non-None str returned by extension replaces the system prompt.""" + modified = "You are AutoBot [modified by extension]." + watcher = _SystemPromptWatcher(return_value=modified) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + result = await _emit_system_prompt_ready("You are AutoBot.", _FakeSession()) + assert result == modified + + @pytest.mark.asyncio + async def test_none_return_keeps_original(self): + """Returning None from extension keeps the original prompt.""" + watcher = _SystemPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "You are AutoBot." + result = await _emit_system_prompt_ready(original, _FakeSession()) + assert result == original + + +class TestEmitFullPromptReady: + """Tests for _emit_full_prompt_ready helper.""" + + @pytest.mark.asyncio + async def test_noop_when_no_extension_registered(self): + """Returns original prompt unchanged when no extension is registered.""" + original = "System prompt\n\nUser: hello\n\nAssistant:" + result = await _emit_full_prompt_ready(original, {}, {}) + assert result == original + + @pytest.mark.asyncio + async def test_extension_receives_correct_args(self): + """Extension receives prompt, llm_params and context via HookContext.""" + watcher = _FullPromptWatcher(return_value=None) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + original = "System prompt\n\nUser: hello\n\nAssistant:" + llm_params = {"model": "llama3", "endpoint": "http://localhost:11434/api/generate"} + context = {"session_id": "sess-abc", "message": "hello"} + + await _emit_full_prompt_ready(original, llm_params, context) + + assert watcher.captured_prompt == original + assert watcher.captured_llm_params == llm_params + assert watcher.captured_context == context + + @pytest.mark.asyncio + async def test_return_value_replaces_prompt(self): + """A non-None str returned by extension replaces the full prompt.""" + modified = "System prompt\n\nUser: hello\n\nAssistant:\n\n[hint: be concise]" + watcher = _FullPromptWatcher(return_value=modified) + from extensions.manager import get_extension_manager + + get_extension_manager().register(watcher) + + result = await _emit_full_prompt_ready( + "System prompt\n\nUser: hello\n\nAssistant:", {}, {} + ) + assert result == modified + + @pytest.mark.asyncio + async def test_extension_error_does_not_crash_pipeline(self): + """An exception inside an extension is swallowed; original prompt is returned.""" + from extensions.manager import get_extension_manager + + get_extension_manager().register(_ErrorExtension()) + + original = "System prompt\n\nUser: hello\n\nAssistant:" + result = await _emit_full_prompt_ready(original, {}, {}) + assert result == original diff --git a/autobot-backend/extensions/base.py b/autobot-backend/extensions/base.py index d363a8c06..639a09409 100644 --- a/autobot-backend/extensions/base.py +++ b/autobot-backend/extensions/base.py @@ -488,6 +488,40 @@ async def on_approval_received(self, ctx: HookContext) -> Optional[None]: None (logging only) """ + # ========== Prompt Pipeline Hooks (Issue #3405) ========== + + async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]: + """ + Called after the system prompt is built. + + Receives the assembled system prompt in ctx.data["system_prompt"] and + ctx.data["session"] for session metadata. Return a non-None str to + replace the system prompt; return None to leave it unchanged. + + Args: + ctx: Hook context with data["system_prompt"] and data["session"] + + Returns: + Modified system prompt str or None to keep unchanged + """ + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + """ + Called after the full prompt (system + knowledge + conversation) is built. + + Receives the full prompt in ctx.data["prompt"], LLM parameters in + ctx.data["llm_params"], and request context in ctx.data["context"]. + Return a non-None str to replace the full prompt; return None to keep it + unchanged. + + Args: + ctx: Hook context with data["prompt"], data["llm_params"], + data["context"] + + Returns: + Modified full prompt str or None to keep unchanged + """ + # ========== Utility Methods ========== def __repr__(self) -> str: diff --git a/autobot-backend/extensions/extension_hooks_test.py b/autobot-backend/extensions/extension_hooks_test.py index 70d40528c..40c1159ce 100644 --- a/autobot-backend/extensions/extension_hooks_test.py +++ b/autobot-backend/extensions/extension_hooks_test.py @@ -30,8 +30,8 @@ class TestHookPoint: """Test HookPoint enum definitions.""" def test_hook_count(self): - """Should have exactly 22 hook points.""" - assert len(HookPoint) == 22 + """Should have exactly 24 hook points (22 original + 2 added in #3405).""" + assert len(HookPoint) == 24 def test_message_preparation_hooks(self): """Should have message preparation hooks.""" @@ -82,6 +82,11 @@ def test_approval_hooks(self): assert HookPoint.APPROVAL_REQUIRED is not None assert HookPoint.APPROVAL_RECEIVED is not None + def test_prompt_pipeline_hooks(self): + """Should have prompt pipeline hooks added in Issue #3405.""" + assert HookPoint.SYSTEM_PROMPT_READY is not None + assert HookPoint.FULL_PROMPT_READY is not None + def test_hook_metadata_exists(self): """Every hook should have metadata.""" for hook in HookPoint: diff --git a/autobot-backend/extensions/hooks.py b/autobot-backend/extensions/hooks.py index 348095d1c..696a15f9d 100644 --- a/autobot-backend/extensions/hooks.py +++ b/autobot-backend/extensions/hooks.py @@ -76,6 +76,10 @@ class HookPoint(Enum): APPROVAL_REQUIRED = auto() # Method: on_approval_required APPROVAL_RECEIVED = auto() # Method: on_approval_received + # Prompt pipeline — Issue #3405 + SYSTEM_PROMPT_READY = auto() # Method: on_system_prompt_ready + FULL_PROMPT_READY = auto() # Method: on_full_prompt_ready + # Hook metadata for documentation and validation HOOK_METADATA = { @@ -189,6 +193,16 @@ class HookPoint(Enum): "can_modify": [], "return_type": "None (logging only)", }, + HookPoint.SYSTEM_PROMPT_READY: { + "description": "Called after the system prompt is built; return a str to replace it", + "can_modify": ["system_prompt"], + "return_type": "Modified system prompt str or None", + }, + HookPoint.FULL_PROMPT_READY: { + "description": "Called after the full prompt is assembled; return a str to replace it", + "can_modify": ["prompt"], + "return_type": "Modified full prompt str or None", + }, } diff --git a/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md b/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md new file mode 100644 index 000000000..708c0677e --- /dev/null +++ b/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md @@ -0,0 +1,192 @@ +# Prompt Middleware Guide + +> Issue #3405 — `ON_SYSTEM_PROMPT_READY` and `ON_FULL_PROMPT_READY` plugin hooks + +This guide explains how to intercept and modify LLM prompts at two points in +the chat pipeline using the extension hook system. + +--- + +## Overview + +AutoBot's chat pipeline assembles prompts in two stages: + +1. **System prompt** — personality preamble + base system prompt + language + instruction. Built inside `LLMHandlerMixin._get_system_prompt()`. +2. **Full prompt** — system prompt + knowledge context (RAG) + conversation + history + current user message. Built inside + `LLMHandlerMixin._build_full_prompt()`. + +After each stage the pipeline fires a hook that lets registered extensions +inspect and optionally rewrite the output before it reaches the LLM. + +--- + +## Hook signatures + +### `HookPoint.ON_SYSTEM_PROMPT_READY` + +Fired after `_get_system_prompt()` returns. + +| Context key | Type | Description | +|---|---|---| +| `system_prompt` | `str` | Assembled system prompt | +| `session` | `WorkflowSession` | Current session instance | + +**Return value:** Return a `str` to replace the system prompt. Return `None` +to leave it unchanged. The pipeline uses the last non-`None` value from all +registered extensions (pipeline chaining via `invoke_with_transform`). + +### `HookPoint.ON_FULL_PROMPT_READY` + +Fired after `_build_full_prompt()` returns. + +| Context key | Type | Description | +|---|---|---| +| `prompt` | `str` | Fully assembled prompt | +| `llm_params` | `dict` | `{"model": ..., "endpoint": ...}` | +| `context` | `dict` | `{"session_id": ..., "message": ...}` | + +**Return value:** Return a `str` to replace the full prompt. Return `None` +to leave it unchanged. + +--- + +## Graceful degradation + +Both hooks are no-ops when no extension is registered. A failing extension is +logged at `ERROR` level but does **not** propagate an exception — the original +prompt is returned unchanged. This guarantees the chat pipeline always +completes even if middleware is misconfigured. + +--- + +## Writing an extension + +Extensions live in `extensions/base.py`. Override the hook method that +corresponds to the `HookPoint` name, converted to `on_`. + +Because the new hooks already contain the prefix `on_`, the method names are: + +- `on_on_system_prompt_ready` — for `ON_SYSTEM_PROMPT_READY` +- `on_on_full_prompt_ready` — for `ON_FULL_PROMPT_READY` + +```python +from extensions.base import Extension, HookContext +from typing import Optional + + +class MyPromptExtension(Extension): + name = "my_prompt_extension" + priority = 150 # lower = runs first + + async def on_on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + prompt = ctx.get("prompt", "") + # append a custom hint + return prompt + "\n\n[Always respond in bullet points.]" +``` + +Register the extension on application startup: + +```python +from extensions.manager import get_extension_manager +from my_module import MyPromptExtension + +get_extension_manager().register(MyPromptExtension()) +``` + +--- + +## Worked example: Telemetry Prompt Middleware + +`plugins/core-plugins/telemetry-prompt-middleware/plugin.py` ships as a +reference implementation. It queries Prometheus for the current cluster-wide +CPU usage. When usage exceeds a configurable threshold the plugin appends a +one-sentence hint asking the model to keep its response concise, reducing +time-to-first-token on a loaded host. + +### How it works + +``` +LLM pipeline + | + +-- _build_full_prompt() + | + +-- ON_FULL_PROMPT_READY fires + | + +-- TelemetryPromptMiddleware.on_on_full_prompt_ready() + | + +-- GET /api/v1/query (Prometheus, 2 s timeout) + | + +-- CPU > threshold? + yes -> return prompt + hint + no -> return None (no change) +``` + +### Configuration + +| Variable | Default | Description | +|---|---|---| +| `PROMETHEUS_URL` (env) | `""` | Prometheus base URL | +| `TELEMETRY_CPU_THRESHOLD` (env) | `80` | CPU % threshold | +| `cpu_threshold_pct` (plugin config) | `80` | Same, via plugin config API | +| `prometheus_url` (plugin config) | `""` | Same, overrides env var | + +If `PROMETHEUS_URL` is not set the plugin silently skips hint injection. + +### Registering the plugin + +```python +import os +from extensions.manager import get_extension_manager +from plugins.core_plugins.telemetry_prompt_middleware.plugin import ( + TelemetryPromptMiddleware, +) + +plugin = TelemetryPromptMiddleware( + config={ + "prometheus_url": os.getenv("PROMETHEUS_URL", ""), + "cpu_threshold_pct": 80, + } +) +get_extension_manager().register(plugin) +``` + +Or load it via the Plugin Manager API (POST `/plugins/telemetry-prompt-middleware/load`). + +--- + +## Testing + +Unit tests for the hooks live in: + +``` +autobot-backend/chat_workflow/prompt_hooks_test.py +``` + +Run them with: + +```bash +cd autobot-backend +pytest chat_workflow/prompt_hooks_test.py -v +``` + +The test suite covers: + +- Both `HookPoint` members exist. +- `ON_SYSTEM_PROMPT_READY` fires with the correct `system_prompt` arg. +- `ON_FULL_PROMPT_READY` fires with `prompt`, `llm_params`, and `context`. +- A returned `str` replaces the prompt. +- `None` return keeps the original prompt unchanged. +- An extension that raises an exception does not crash the pipeline. + +--- + +## Related + +- `extensions/hooks.py` — `HookPoint` enum +- `extensions/base.py` — `Extension` base class with hook methods +- `extensions/manager.py` — `ExtensionManager` and `invoke_with_transform` +- `chat_workflow/llm_handler.py` — hook call sites +- `docs/developer/PLUGIN_SDK.md` — Plugin SDK lifecycle docs +- Issue #3405 diff --git a/plugins/core-plugins/telemetry-prompt-middleware/__init__.py b/plugins/core-plugins/telemetry-prompt-middleware/__init__.py new file mode 100644 index 000000000..bf25d5fd0 --- /dev/null +++ b/plugins/core-plugins/telemetry-prompt-middleware/__init__.py @@ -0,0 +1,3 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss diff --git a/plugins/core-plugins/telemetry-prompt-middleware/plugin.json b/plugins/core-plugins/telemetry-prompt-middleware/plugin.json new file mode 100644 index 000000000..99c7871c1 --- /dev/null +++ b/plugins/core-plugins/telemetry-prompt-middleware/plugin.json @@ -0,0 +1,25 @@ +{ + "name": "telemetry-prompt-middleware", + "version": "1.0.0", + "display_name": "Telemetry Prompt Middleware", + "description": "Appends a concise-response hint to the LLM prompt when CPU load is above a configurable threshold", + "author": "mrveiss", + "entry_point": "plugins.core_plugins.telemetry_prompt_middleware.plugin", + "dependencies": ["aiohttp"], + "config_schema": { + "type": "object", + "properties": { + "cpu_threshold_pct": { + "type": "number", + "default": 80, + "description": "CPU usage percentage above which the hint is injected" + }, + "prometheus_url": { + "type": "string", + "default": "", + "description": "Prometheus base URL (overrides PROMETHEUS_URL env var)" + } + } + }, + "hooks": ["on_full_prompt_ready"] +} diff --git a/plugins/core-plugins/telemetry-prompt-middleware/plugin.py b/plugins/core-plugins/telemetry-prompt-middleware/plugin.py new file mode 100644 index 000000000..34a46b9e2 --- /dev/null +++ b/plugins/core-plugins/telemetry-prompt-middleware/plugin.py @@ -0,0 +1,102 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Telemetry Prompt Middleware Plugin — Issue #3405. + +Registers ON_FULL_PROMPT_READY to inspect live CPU load from Prometheus. +When average CPU usage across all nodes exceeds the configured threshold, +a one-sentence hint is appended to the prompt asking the model to keep its +response concise to reduce wall-clock processing time on a loaded host. + +Configuration (via plugin config or environment variables): + cpu_threshold_pct — float, default 80. Percent CPU above which the + hint fires. Set to 0 to always inject; 100 to + effectively disable. + prometheus_url — str, overrides the PROMETHEUS_URL env var when set. + +Environment variables: + PROMETHEUS_URL — Base URL of the Prometheus instance, e.g. + "http://192.168.1.10:9090". + TELEMETRY_CPU_THRESHOLD — Override threshold without touching plugin config. +""" + +import logging +import os +from typing import Dict, Optional + +import aiohttp + +from extensions.base import Extension, HookContext + +logger = logging.getLogger(__name__) + +_CPU_PROMQL = ( + "100 - (avg(rate(node_cpu_seconds_total{mode='idle'}[2m])) * 100)" +) +_HIGH_LOAD_HINT = ( + "[System note: host CPU is currently under high load. " + "Please keep your response concise to minimise processing time.]" +) +_QUERY_TIMEOUT = aiohttp.ClientTimeout(total=2.0) + + +class TelemetryPromptMiddleware(Extension): + """Prompt middleware that injects a load-aware hint via ON_FULL_PROMPT_READY.""" + + name = "telemetry_prompt_middleware" + priority = 200 + + def __init__(self, config: Optional[Dict] = None) -> None: + self._config = config or {} + self._threshold = float( + os.getenv( + "TELEMETRY_CPU_THRESHOLD", + self._config.get("cpu_threshold_pct", 80), + ) + ) + self._prometheus_url = ( + self._config.get("prometheus_url") + or os.getenv("PROMETHEUS_URL", "") + ).rstrip("/") + + async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]: + """Append a concise-response hint when CPU load exceeds threshold.""" + prompt = ctx.get("prompt", "") + if not prompt: + return None + + cpu_pct = await self._fetch_cpu_percent() + if cpu_pct is None: + logger.debug("[#3405] Telemetry plugin: Prometheus unavailable, skipping") + return None + + logger.debug("[#3405] Telemetry plugin: current CPU %.1f%% (threshold %.1f%%)", cpu_pct, self._threshold) + if cpu_pct < self._threshold: + return None + + logger.info( + "[#3405] Telemetry plugin: CPU %.1f%% > threshold %.1f%% — injecting hint", + cpu_pct, + self._threshold, + ) + return f"{prompt}\n\n{_HIGH_LOAD_HINT}" + + async def _fetch_cpu_percent(self) -> Optional[float]: + """Query Prometheus for the current cluster-wide CPU usage percentage.""" + if not self._prometheus_url: + return None + url = f"{self._prometheus_url}/api/v1/query" + try: + async with aiohttp.ClientSession(timeout=_QUERY_TIMEOUT) as session: + async with session.get(url, params={"query": _CPU_PROMQL}) as resp: + if resp.status != 200: + return None + payload = await resp.json() + results = payload.get("data", {}).get("result", []) + if not results: + return None + return float(results[0]["value"][1]) + except Exception as exc: + logger.debug("[#3405] Telemetry plugin: Prometheus query failed: %s", exc) + return None