diff --git a/autobot-backend/chat_workflow/llm_handler.py b/autobot-backend/chat_workflow/llm_handler.py
index 0739e2a7f..7287029a8 100644
--- a/autobot-backend/chat_workflow/llm_handler.py
+++ b/autobot-backend/chat_workflow/llm_handler.py
@@ -17,6 +17,9 @@
 from autobot_shared.http_client import get_http_client
 from constants.model_constants import ModelConstants
 from dependencies import global_config_manager
+from extensions.base import HookContext
+from extensions.hooks import HookPoint
+from extensions.manager import get_extension_manager
 from prompt_manager import get_language_instruction, get_prompt, resolve_language
 
 from .models import WorkflowSession
@@ -27,6 +30,73 @@
 _VALID_URL_SCHEMES = ("http://", "https://")
 
 
+async def _emit_system_prompt_ready(system_prompt: str, session: Any) -> str:
+    """Emit ON_SYSTEM_PROMPT_READY to registered extensions and return result.
+
+    Issue #3405: Fires after _get_system_prompt() so extensions can inspect or
+    rewrite the system prompt before it enters prompt assembly.  If no extension
+    is registered for this hook the function is a no-op and the original prompt
+    is returned unchanged.
+
+    Args:
+        system_prompt: The assembled system prompt string.
+        session: WorkflowSession instance (passed as data["session"]).
+
+    Returns:
+        Possibly modified system prompt string.
+    """
+    ctx = HookContext(
+        session_id=getattr(session, "session_id", ""),
+        data={"system_prompt": system_prompt, "session": session},
+    )
+    result = await get_extension_manager().invoke_with_transform(
+        HookPoint.SYSTEM_PROMPT_READY, ctx, "system_prompt"
+    )
+    if isinstance(result, str) and result != system_prompt:
+        logger.debug(
+            "[#3405] SYSTEM_PROMPT_READY modified system prompt (%d -> %d chars)",
+            len(system_prompt),
+            len(result),
+        )
+        return result
+    return system_prompt
+
+
+async def _emit_full_prompt_ready(
+    prompt: str, llm_params: Dict[str, Any], context: Dict[str, Any]
+) -> str:
+    """Emit ON_FULL_PROMPT_READY to registered extensions and return result.
+
+    Issue #3405: Fires after _build_full_prompt() so extensions can append
+    dynamic content (e.g. infrastructure telemetry hints) before the prompt
+    is sent to the LLM.  If no extension is registered for this hook the
+    function is a no-op and the original prompt is returned unchanged.
+
+    Args:
+        prompt: The fully assembled prompt string.
+        llm_params: Dict containing model/endpoint selection.
+        context: Arbitrary request-level context dict.
+
+    Returns:
+        Possibly modified full prompt string.
+    """
+    ctx = HookContext(
+        session_id=context.get("session_id", ""),
+        data={"prompt": prompt, "llm_params": llm_params, "context": context},
+    )
+    result = await get_extension_manager().invoke_with_transform(
+        HookPoint.FULL_PROMPT_READY, ctx, "prompt"
+    )
+    if isinstance(result, str) and result != prompt:
+        logger.debug(
+            "[#3405] FULL_PROMPT_READY modified full prompt (%d -> %d chars)",
+            len(prompt),
+            len(result),
+        )
+        return result
+    return prompt
+
+
 class LLMHandlerMixin:
     """Mixin for LLM interaction handling."""
 
@@ -310,6 +380,7 @@ async def _prepare_llm_request_params(
         else:
             ollama_endpoint = self._get_ollama_endpoint_for_model(selected_model)
         system_prompt = self._get_system_prompt(language=language)
+        system_prompt = await _emit_system_prompt_ready(system_prompt, session)
         conversation_context = self._build_conversation_context(session)
 
         # Knowledge retrieval for RAG
@@ -324,6 +395,11 @@ async def _prepare_llm_request_params(
         full_prompt = self._build_full_prompt(
             system_prompt, knowledge_context, conversation_context, message
         )
+        full_prompt = await _emit_full_prompt_ready(
+            full_prompt,
+            {"endpoint": ollama_endpoint, "model": selected_model},
+            {"session_id": session.session_id, "message": message},
+        )
 
         logger.info(
             "[ChatWorkflowManager] Making Ollama request to: %s", ollama_endpoint
diff --git a/autobot-backend/chat_workflow/prompt_hooks_test.py b/autobot-backend/chat_workflow/prompt_hooks_test.py
new file mode 100644
index 000000000..f745dd2c2
--- /dev/null
+++ b/autobot-backend/chat_workflow/prompt_hooks_test.py
@@ -0,0 +1,195 @@
+# AutoBot - AI-Powered Automation Platform
+# Copyright (c) 2025 mrveiss
+# Author: mrveiss
+"""
+Unit tests for Issue #3405 — ON_SYSTEM_PROMPT_READY and ON_FULL_PROMPT_READY
+plugin hooks in the chat pipeline.
+
+Tests verify:
+1. New HookPoint enum members exist
+2. ON_SYSTEM_PROMPT_READY fires with correct args and return value replaces prompt
+3. ON_FULL_PROMPT_READY fires with correct args and return value replaces prompt
+4. No-op when no extensions are registered for a hook
+5. Extension errors do not crash the pipeline
+"""
+
+from typing import Optional
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from extensions.base import Extension, HookContext
+from extensions.hooks import HookPoint
+from extensions.manager import ExtensionManager, reset_extension_manager
+from chat_workflow.llm_handler import _emit_system_prompt_ready, _emit_full_prompt_ready
+
+
+class _SystemPromptWatcher(Extension):
+    """Extension that records args and returns a modified system prompt."""
+
+    name = "test_system_prompt_watcher"
+
+    def __init__(self, return_value: Optional[str] = None) -> None:
+        self._return_value = return_value
+        self.captured_system_prompt: Optional[str] = None
+
+    async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        self.captured_system_prompt = ctx.get("system_prompt")
+        return self._return_value
+
+
+class _FullPromptWatcher(Extension):
+    """Extension that records args and returns a modified full prompt."""
+
+    name = "test_full_prompt_watcher"
+
+    def __init__(self, return_value: Optional[str] = None) -> None:
+        self._return_value = return_value
+        self.captured_prompt: Optional[str] = None
+        self.captured_llm_params: Optional[dict] = None
+        self.captured_context: Optional[dict] = None
+
+    async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        self.captured_prompt = ctx.get("prompt")
+        self.captured_llm_params = ctx.get("llm_params")
+        self.captured_context = ctx.get("context")
+        return self._return_value
+
+
+class _ErrorExtension(Extension):
+    """Extension that always raises an exception."""
+
+    name = "test_error_extension"
+
+    async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        raise RuntimeError("simulated extension failure")
+
+
+class _FakeSession:
+    session_id = "sess-test-001"
+    metadata: dict = {}
+
+
+@pytest.fixture(autouse=True)
+def reset_manager():
+    """Ensure global ExtensionManager is reset between tests."""
+    reset_extension_manager()
+    yield
+    reset_extension_manager()
+
+
+class TestNewHookPoints:
+    """Verify the new HookPoint members are present."""
+
+    def test_on_system_prompt_ready_exists(self):
+        assert HookPoint.SYSTEM_PROMPT_READY is not None
+
+    def test_on_full_prompt_ready_exists(self):
+        assert HookPoint.FULL_PROMPT_READY is not None
+
+    def test_total_hook_count_increased(self):
+        # Original 22 hooks + 2 new ones = 24
+        assert len(HookPoint) == 24
+
+
+class TestEmitSystemPromptReady:
+    """Tests for _emit_system_prompt_ready helper."""
+
+    @pytest.mark.asyncio
+    async def test_noop_when_no_extension_registered(self):
+        """Returns original prompt unchanged when no extension is registered."""
+        original = "You are AutoBot."
+        result = await _emit_system_prompt_ready(original, _FakeSession())
+        assert result == original
+
+    @pytest.mark.asyncio
+    async def test_extension_receives_correct_args(self):
+        """Extension receives system_prompt and session via HookContext."""
+        watcher = _SystemPromptWatcher(return_value=None)
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(watcher)
+
+        original = "You are AutoBot."
+        session = _FakeSession()
+        await _emit_system_prompt_ready(original, session)
+
+        assert watcher.captured_system_prompt == original
+
+    @pytest.mark.asyncio
+    async def test_return_value_replaces_prompt(self):
+        """A non-None str returned by extension replaces the system prompt."""
+        modified = "You are AutoBot [modified by extension]."
+        watcher = _SystemPromptWatcher(return_value=modified)
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(watcher)
+
+        result = await _emit_system_prompt_ready("You are AutoBot.", _FakeSession())
+        assert result == modified
+
+    @pytest.mark.asyncio
+    async def test_none_return_keeps_original(self):
+        """Returning None from extension keeps the original prompt."""
+        watcher = _SystemPromptWatcher(return_value=None)
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(watcher)
+
+        original = "You are AutoBot."
+        result = await _emit_system_prompt_ready(original, _FakeSession())
+        assert result == original
+
+
+class TestEmitFullPromptReady:
+    """Tests for _emit_full_prompt_ready helper."""
+
+    @pytest.mark.asyncio
+    async def test_noop_when_no_extension_registered(self):
+        """Returns original prompt unchanged when no extension is registered."""
+        original = "System prompt\n\nUser: hello\n\nAssistant:"
+        result = await _emit_full_prompt_ready(original, {}, {})
+        assert result == original
+
+    @pytest.mark.asyncio
+    async def test_extension_receives_correct_args(self):
+        """Extension receives prompt, llm_params and context via HookContext."""
+        watcher = _FullPromptWatcher(return_value=None)
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(watcher)
+
+        original = "System prompt\n\nUser: hello\n\nAssistant:"
+        llm_params = {"model": "llama3", "endpoint": "http://localhost:11434/api/generate"}
+        context = {"session_id": "sess-abc", "message": "hello"}
+
+        await _emit_full_prompt_ready(original, llm_params, context)
+
+        assert watcher.captured_prompt == original
+        assert watcher.captured_llm_params == llm_params
+        assert watcher.captured_context == context
+
+    @pytest.mark.asyncio
+    async def test_return_value_replaces_prompt(self):
+        """A non-None str returned by extension replaces the full prompt."""
+        modified = "System prompt\n\nUser: hello\n\nAssistant:\n\n[hint: be concise]"
+        watcher = _FullPromptWatcher(return_value=modified)
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(watcher)
+
+        result = await _emit_full_prompt_ready(
+            "System prompt\n\nUser: hello\n\nAssistant:", {}, {}
+        )
+        assert result == modified
+
+    @pytest.mark.asyncio
+    async def test_extension_error_does_not_crash_pipeline(self):
+        """An exception inside an extension is swallowed; original prompt is returned."""
+        from extensions.manager import get_extension_manager
+
+        get_extension_manager().register(_ErrorExtension())
+
+        original = "System prompt\n\nUser: hello\n\nAssistant:"
+        result = await _emit_full_prompt_ready(original, {}, {})
+        assert result == original
diff --git a/autobot-backend/extensions/base.py b/autobot-backend/extensions/base.py
index d363a8c06..639a09409 100644
--- a/autobot-backend/extensions/base.py
+++ b/autobot-backend/extensions/base.py
@@ -488,6 +488,40 @@ async def on_approval_received(self, ctx: HookContext) -> Optional[None]:
             None (logging only)
         """
 
+    # ========== Prompt Pipeline Hooks (Issue #3405) ==========
+
+    async def on_system_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        """
+        Called after the system prompt is built.
+
+        Receives the assembled system prompt in ctx.data["system_prompt"] and
+        ctx.data["session"] for session metadata.  Return a non-None str to
+        replace the system prompt; return None to leave it unchanged.
+
+        Args:
+            ctx: Hook context with data["system_prompt"] and data["session"]
+
+        Returns:
+            Modified system prompt str or None to keep unchanged
+        """
+
+    async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        """
+        Called after the full prompt (system + knowledge + conversation) is built.
+
+        Receives the full prompt in ctx.data["prompt"], LLM parameters in
+        ctx.data["llm_params"], and request context in ctx.data["context"].
+        Return a non-None str to replace the full prompt; return None to keep it
+        unchanged.
+
+        Args:
+            ctx: Hook context with data["prompt"], data["llm_params"],
+                 data["context"]
+
+        Returns:
+            Modified full prompt str or None to keep unchanged
+        """
+
     # ========== Utility Methods ==========
 
     def __repr__(self) -> str:
diff --git a/autobot-backend/extensions/extension_hooks_test.py b/autobot-backend/extensions/extension_hooks_test.py
index 70d40528c..40c1159ce 100644
--- a/autobot-backend/extensions/extension_hooks_test.py
+++ b/autobot-backend/extensions/extension_hooks_test.py
@@ -30,8 +30,8 @@ class TestHookPoint:
     """Test HookPoint enum definitions."""
 
     def test_hook_count(self):
-        """Should have exactly 22 hook points."""
-        assert len(HookPoint) == 22
+        """Should have exactly 24 hook points (22 original + 2 added in #3405)."""
+        assert len(HookPoint) == 24
 
     def test_message_preparation_hooks(self):
         """Should have message preparation hooks."""
@@ -82,6 +82,11 @@ def test_approval_hooks(self):
         assert HookPoint.APPROVAL_REQUIRED is not None
         assert HookPoint.APPROVAL_RECEIVED is not None
 
+    def test_prompt_pipeline_hooks(self):
+        """Should have prompt pipeline hooks added in Issue #3405."""
+        assert HookPoint.SYSTEM_PROMPT_READY is not None
+        assert HookPoint.FULL_PROMPT_READY is not None
+
     def test_hook_metadata_exists(self):
         """Every hook should have metadata."""
         for hook in HookPoint:
diff --git a/autobot-backend/extensions/hooks.py b/autobot-backend/extensions/hooks.py
index 348095d1c..696a15f9d 100644
--- a/autobot-backend/extensions/hooks.py
+++ b/autobot-backend/extensions/hooks.py
@@ -76,6 +76,10 @@ class HookPoint(Enum):
     APPROVAL_REQUIRED = auto()  # Method: on_approval_required
     APPROVAL_RECEIVED = auto()  # Method: on_approval_received
 
+    # Prompt pipeline — Issue #3405
+    SYSTEM_PROMPT_READY = auto()  # Method: on_system_prompt_ready
+    FULL_PROMPT_READY = auto()    # Method: on_full_prompt_ready
+
 
 # Hook metadata for documentation and validation
 HOOK_METADATA = {
@@ -189,6 +193,16 @@ class HookPoint(Enum):
         "can_modify": [],
         "return_type": "None (logging only)",
     },
+    HookPoint.SYSTEM_PROMPT_READY: {
+        "description": "Called after the system prompt is built; return a str to replace it",
+        "can_modify": ["system_prompt"],
+        "return_type": "Modified system prompt str or None",
+    },
+    HookPoint.FULL_PROMPT_READY: {
+        "description": "Called after the full prompt is assembled; return a str to replace it",
+        "can_modify": ["prompt"],
+        "return_type": "Modified full prompt str or None",
+    },
 }
 
 
diff --git a/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md b/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md
new file mode 100644
index 000000000..708c0677e
--- /dev/null
+++ b/docs/developer/PROMPT_MIDDLEWARE_GUIDE.md
@@ -0,0 +1,192 @@
+# Prompt Middleware Guide
+
+> Issue #3405 — `ON_SYSTEM_PROMPT_READY` and `ON_FULL_PROMPT_READY` plugin hooks
+
+This guide explains how to intercept and modify LLM prompts at two points in
+the chat pipeline using the extension hook system.
+
+---
+
+## Overview
+
+AutoBot's chat pipeline assembles prompts in two stages:
+
+1. **System prompt** — personality preamble + base system prompt + language
+   instruction.  Built inside `LLMHandlerMixin._get_system_prompt()`.
+2. **Full prompt** — system prompt + knowledge context (RAG) + conversation
+   history + current user message.  Built inside
+   `LLMHandlerMixin._build_full_prompt()`.
+
+After each stage the pipeline fires a hook that lets registered extensions
+inspect and optionally rewrite the output before it reaches the LLM.
+
+---
+
+## Hook signatures
+
+### `HookPoint.ON_SYSTEM_PROMPT_READY`
+
+Fired after `_get_system_prompt()` returns.
+
+| Context key | Type | Description |
+|---|---|---|
+| `system_prompt` | `str` | Assembled system prompt |
+| `session` | `WorkflowSession` | Current session instance |
+
+**Return value:** Return a `str` to replace the system prompt.  Return `None`
+to leave it unchanged.  The pipeline uses the last non-`None` value from all
+registered extensions (pipeline chaining via `invoke_with_transform`).
+
+### `HookPoint.ON_FULL_PROMPT_READY`
+
+Fired after `_build_full_prompt()` returns.
+
+| Context key | Type | Description |
+|---|---|---|
+| `prompt` | `str` | Fully assembled prompt |
+| `llm_params` | `dict` | `{"model": ..., "endpoint": ...}` |
+| `context` | `dict` | `{"session_id": ..., "message": ...}` |
+
+**Return value:** Return a `str` to replace the full prompt.  Return `None`
+to leave it unchanged.
+
+---
+
+## Graceful degradation
+
+Both hooks are no-ops when no extension is registered.  A failing extension is
+logged at `ERROR` level but does **not** propagate an exception — the original
+prompt is returned unchanged.  This guarantees the chat pipeline always
+completes even if middleware is misconfigured.
+
+---
+
+## Writing an extension
+
+Extensions live in `extensions/base.py`.  Override the hook method that
+corresponds to the `HookPoint` name, converted to `on_<hook_name_lower>`.
+
+Because the new hooks already contain the prefix `on_`, the method names are:
+
+- `on_on_system_prompt_ready` — for `ON_SYSTEM_PROMPT_READY`
+- `on_on_full_prompt_ready` — for `ON_FULL_PROMPT_READY`
+
+```python
+from extensions.base import Extension, HookContext
+from typing import Optional
+
+
+class MyPromptExtension(Extension):
+    name = "my_prompt_extension"
+    priority = 150  # lower = runs first
+
+    async def on_on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        prompt = ctx.get("prompt", "")
+        # append a custom hint
+        return prompt + "\n\n[Always respond in bullet points.]"
+```
+
+Register the extension on application startup:
+
+```python
+from extensions.manager import get_extension_manager
+from my_module import MyPromptExtension
+
+get_extension_manager().register(MyPromptExtension())
+```
+
+---
+
+## Worked example: Telemetry Prompt Middleware
+
+`plugins/core-plugins/telemetry-prompt-middleware/plugin.py` ships as a
+reference implementation.  It queries Prometheus for the current cluster-wide
+CPU usage.  When usage exceeds a configurable threshold the plugin appends a
+one-sentence hint asking the model to keep its response concise, reducing
+time-to-first-token on a loaded host.
+
+### How it works
+
+```
+LLM pipeline
+  |
+  +-- _build_full_prompt()
+  |
+  +-- ON_FULL_PROMPT_READY fires
+        |
+        +-- TelemetryPromptMiddleware.on_on_full_prompt_ready()
+              |
+              +-- GET /api/v1/query  (Prometheus, 2 s timeout)
+              |
+              +-- CPU > threshold?
+                    yes -> return prompt + hint
+                    no  -> return None  (no change)
+```
+
+### Configuration
+
+| Variable | Default | Description |
+|---|---|---|
+| `PROMETHEUS_URL` (env) | `""` | Prometheus base URL |
+| `TELEMETRY_CPU_THRESHOLD` (env) | `80` | CPU % threshold |
+| `cpu_threshold_pct` (plugin config) | `80` | Same, via plugin config API |
+| `prometheus_url` (plugin config) | `""` | Same, overrides env var |
+
+If `PROMETHEUS_URL` is not set the plugin silently skips hint injection.
+
+### Registering the plugin
+
+```python
+import os
+from extensions.manager import get_extension_manager
+from plugins.core_plugins.telemetry_prompt_middleware.plugin import (
+    TelemetryPromptMiddleware,
+)
+
+plugin = TelemetryPromptMiddleware(
+    config={
+        "prometheus_url": os.getenv("PROMETHEUS_URL", ""),
+        "cpu_threshold_pct": 80,
+    }
+)
+get_extension_manager().register(plugin)
+```
+
+Or load it via the Plugin Manager API (POST `/plugins/telemetry-prompt-middleware/load`).
+
+---
+
+## Testing
+
+Unit tests for the hooks live in:
+
+```
+autobot-backend/chat_workflow/prompt_hooks_test.py
+```
+
+Run them with:
+
+```bash
+cd autobot-backend
+pytest chat_workflow/prompt_hooks_test.py -v
+```
+
+The test suite covers:
+
+- Both `HookPoint` members exist.
+- `ON_SYSTEM_PROMPT_READY` fires with the correct `system_prompt` arg.
+- `ON_FULL_PROMPT_READY` fires with `prompt`, `llm_params`, and `context`.
+- A returned `str` replaces the prompt.
+- `None` return keeps the original prompt unchanged.
+- An extension that raises an exception does not crash the pipeline.
+
+---
+
+## Related
+
+- `extensions/hooks.py` — `HookPoint` enum
+- `extensions/base.py` — `Extension` base class with hook methods
+- `extensions/manager.py` — `ExtensionManager` and `invoke_with_transform`
+- `chat_workflow/llm_handler.py` — hook call sites
+- `docs/developer/PLUGIN_SDK.md` — Plugin SDK lifecycle docs
+- Issue #3405
diff --git a/plugins/core-plugins/telemetry-prompt-middleware/__init__.py b/plugins/core-plugins/telemetry-prompt-middleware/__init__.py
new file mode 100644
index 000000000..bf25d5fd0
--- /dev/null
+++ b/plugins/core-plugins/telemetry-prompt-middleware/__init__.py
@@ -0,0 +1,3 @@
+# AutoBot - AI-Powered Automation Platform
+# Copyright (c) 2025 mrveiss
+# Author: mrveiss
diff --git a/plugins/core-plugins/telemetry-prompt-middleware/plugin.json b/plugins/core-plugins/telemetry-prompt-middleware/plugin.json
new file mode 100644
index 000000000..99c7871c1
--- /dev/null
+++ b/plugins/core-plugins/telemetry-prompt-middleware/plugin.json
@@ -0,0 +1,25 @@
+{
+  "name": "telemetry-prompt-middleware",
+  "version": "1.0.0",
+  "display_name": "Telemetry Prompt Middleware",
+  "description": "Appends a concise-response hint to the LLM prompt when CPU load is above a configurable threshold",
+  "author": "mrveiss",
+  "entry_point": "plugins.core_plugins.telemetry_prompt_middleware.plugin",
+  "dependencies": ["aiohttp"],
+  "config_schema": {
+    "type": "object",
+    "properties": {
+      "cpu_threshold_pct": {
+        "type": "number",
+        "default": 80,
+        "description": "CPU usage percentage above which the hint is injected"
+      },
+      "prometheus_url": {
+        "type": "string",
+        "default": "",
+        "description": "Prometheus base URL (overrides PROMETHEUS_URL env var)"
+      }
+    }
+  },
+  "hooks": ["on_full_prompt_ready"]
+}
diff --git a/plugins/core-plugins/telemetry-prompt-middleware/plugin.py b/plugins/core-plugins/telemetry-prompt-middleware/plugin.py
new file mode 100644
index 000000000..34a46b9e2
--- /dev/null
+++ b/plugins/core-plugins/telemetry-prompt-middleware/plugin.py
@@ -0,0 +1,102 @@
+# AutoBot - AI-Powered Automation Platform
+# Copyright (c) 2025 mrveiss
+# Author: mrveiss
+"""
+Telemetry Prompt Middleware Plugin — Issue #3405.
+
+Registers ON_FULL_PROMPT_READY to inspect live CPU load from Prometheus.
+When average CPU usage across all nodes exceeds the configured threshold,
+a one-sentence hint is appended to the prompt asking the model to keep its
+response concise to reduce wall-clock processing time on a loaded host.
+
+Configuration (via plugin config or environment variables):
+    cpu_threshold_pct  — float, default 80.  Percent CPU above which the
+                         hint fires.  Set to 0 to always inject; 100 to
+                         effectively disable.
+    prometheus_url     — str, overrides the PROMETHEUS_URL env var when set.
+
+Environment variables:
+    PROMETHEUS_URL     — Base URL of the Prometheus instance, e.g.
+                         "http://192.168.1.10:9090".
+    TELEMETRY_CPU_THRESHOLD — Override threshold without touching plugin config.
+"""
+
+import logging
+import os
+from typing import Dict, Optional
+
+import aiohttp
+
+from extensions.base import Extension, HookContext
+
+logger = logging.getLogger(__name__)
+
+_CPU_PROMQL = (
+    "100 - (avg(rate(node_cpu_seconds_total{mode='idle'}[2m])) * 100)"
+)
+_HIGH_LOAD_HINT = (
+    "[System note: host CPU is currently under high load. "
+    "Please keep your response concise to minimise processing time.]"
+)
+_QUERY_TIMEOUT = aiohttp.ClientTimeout(total=2.0)
+
+
+class TelemetryPromptMiddleware(Extension):
+    """Prompt middleware that injects a load-aware hint via ON_FULL_PROMPT_READY."""
+
+    name = "telemetry_prompt_middleware"
+    priority = 200
+
+    def __init__(self, config: Optional[Dict] = None) -> None:
+        self._config = config or {}
+        self._threshold = float(
+            os.getenv(
+                "TELEMETRY_CPU_THRESHOLD",
+                self._config.get("cpu_threshold_pct", 80),
+            )
+        )
+        self._prometheus_url = (
+            self._config.get("prometheus_url")
+            or os.getenv("PROMETHEUS_URL", "")
+        ).rstrip("/")
+
+    async def on_full_prompt_ready(self, ctx: HookContext) -> Optional[str]:
+        """Append a concise-response hint when CPU load exceeds threshold."""
+        prompt = ctx.get("prompt", "")
+        if not prompt:
+            return None
+
+        cpu_pct = await self._fetch_cpu_percent()
+        if cpu_pct is None:
+            logger.debug("[#3405] Telemetry plugin: Prometheus unavailable, skipping")
+            return None
+
+        logger.debug("[#3405] Telemetry plugin: current CPU %.1f%% (threshold %.1f%%)", cpu_pct, self._threshold)
+        if cpu_pct < self._threshold:
+            return None
+
+        logger.info(
+            "[#3405] Telemetry plugin: CPU %.1f%% > threshold %.1f%% — injecting hint",
+            cpu_pct,
+            self._threshold,
+        )
+        return f"{prompt}\n\n{_HIGH_LOAD_HINT}"
+
+    async def _fetch_cpu_percent(self) -> Optional[float]:
+        """Query Prometheus for the current cluster-wide CPU usage percentage."""
+        if not self._prometheus_url:
+            return None
+        url = f"{self._prometheus_url}/api/v1/query"
+        try:
+            async with aiohttp.ClientSession(timeout=_QUERY_TIMEOUT) as session:
+                async with session.get(url, params={"query": _CPU_PROMQL}) as resp:
+                    if resp.status != 200:
+                        return None
+                    payload = await resp.json()
+            results = payload.get("data", {}).get("result", [])
+            if not results:
+                return None
+            return float(results[0]["value"][1])
+        except Exception as exc:
+            logger.debug("[#3405] Telemetry plugin: Prometheus query failed: %s", exc)
+            return None