diff --git a/__init__.py b/__init__.py
index 02bec5c..f535d7f 100644
--- a/__init__.py
+++ b/__init__.py
@@ -324,6 +324,73 @@ def _measure_actual_tokens(
     }
 
 
+def _classify_prompt_content_for_canary(text: str) -> str:
+    """Conservatively classify runtime system text for prompt-dedup canary.
+
+    Runtime API payloads usually expose both system and skill instructions as
+    role='system' messages. The canary may only rewrite clearly skill-like text;
+    ordinary/unclear system content stays system_prompt and is therefore never
+    eligible for the same_type_skill_prompt_only canary class.
+    """
+    low = text.lower()
+    stripped = low.lstrip()
+    if stripped.startswith("---") and "name:" in low[:300]:
+        return "skill_prompt"
+    # Runtime canary is stricter than the offline analyzer: only obvious skill
+    # documents whose leading text says "use this skill" are writable. Broader
+    # cues such as "available skills" remain system_prompt at runtime.
+    if "use this skill" in low[:500]:
+        return "skill_prompt"
+    return "system_prompt"
+
+
+def _apply_prompt_dedup_canary_to_api_messages(
+    api_messages: List[Dict[str, Any]], *, salt: str = "contextpilot-runtime-prompt-dedup-v1"
+):
+    """Apply the default-off skill-prompt canary to runtime API messages.
+
+    This is a narrow adapter from Hermes/OpenAI-style messages to the analyzer
+    package's in-memory _LLMContent carrier. It mutates api_messages only when
+    CONTEXTPILOT_PROMPT_DEDUP_MODE=canary and the canary module replaces a
+    same_type_skill_prompt_only duplicate. User/assistant/tool and ordinary
+    system content are never passed as writable skill_prompt items.
+    """
+    try:
+        from contextpilot.hermes_opportunities.models import _LLMContent
+        from contextpilot.hermes_opportunities.prompt_dedup_canary import (
+            apply_prompt_dedup_canary,
+        )
+    except Exception as e:  # noqa: BLE001 - canary must never break requests
+        logger.debug("[ContextPilot] prompt dedup canary unavailable: %s", e)
+        return None
+
+    llm_items = []
+    message_indexes = []
+    for idx, msg in enumerate(api_messages):
+        if not isinstance(msg, dict) or msg.get("role") != "system":
+            continue
+        content = msg.get("content")
+        if not isinstance(content, str):
+            continue
+        block_type = _classify_prompt_content_for_canary(content)
+        llm_items.append(_LLMContent(block_type=block_type, content=content))
+        message_indexes.append(idx)
+
+    if not llm_items:
+        return None
+
+    result = apply_prompt_dedup_canary(
+        llm_items,
+        salt=salt,
+        min_block_chars=40,
+    )
+    if result and result.mutated:
+        for item, idx in zip(llm_items, message_indexes):
+            if item.block_type == "skill_prompt":
+                api_messages[idx]["content"] = item.content
+    return result
+
+
 def _reorder_docs(docs: List[str], alpha: float = 0.001) -> List[str]:
     global _intercept_index
     if len(docs) < 2:
@@ -736,7 +803,16 @@ def _tool_chars(msgs):
             except Exception as e:
                 logger.debug("[ContextPilot] Extract/reorder failed: %s", e)
 
-        # Step 5: Block-level dedup
+        # Step 5: Optional prompt-dedup canary (default off). This is the only
+        # runtime prompt mutation path and is limited to same_type_skill_prompt_only.
+        prompt_dedup_result = _apply_prompt_dedup_canary_to_api_messages(api_messages)
+        prompt_dedup_chars_saved = (
+            prompt_dedup_result.chars_saved
+            if prompt_dedup_result is not None and prompt_dedup_result.mutated
+            else 0
+        )
+
+        # Step 6: Block-level dedup
         sys_content = None
         for msg in api_messages:
             if isinstance(msg, dict) and msg.get("role") == "system":
@@ -749,7 +825,7 @@ def _tool_chars(msgs):
             {"messages": api_messages},
             system_content=sys_content,
         )
-        turn_chars_saved = doc_chars_saved + dedup_result.chars_saved
+        turn_chars_saved = doc_chars_saved + dedup_result.chars_saved + prompt_dedup_chars_saved
         self._total_chars_saved += turn_chars_saved
 
         # Actual before/after of the full LLM-bound payload (chars). These are
@@ -801,6 +877,18 @@ def _tool_chars(msgs):
                 "tokens_saved_method": "estimated_chars_div_4",
                 "doc_chars_saved": doc_chars_saved,
                 "block_chars_saved": dedup_result.chars_saved,
+                "prompt_dedup_mode": (
+                    prompt_dedup_result.mode if prompt_dedup_result is not None else "off"
+                ),
+                "prompt_dedup_class": (
+                    prompt_dedup_result.prompt_dedup_class
+                    if prompt_dedup_result is not None else "same_type_skill_prompt_only"
+                ),
+                "prompt_dedup_blocks_replaced": (
+                    prompt_dedup_result.blocks_replaced
+                    if prompt_dedup_result is not None and prompt_dedup_result.mutated else 0
+                ),
+                "prompt_dedup_chars_saved": prompt_dedup_chars_saved,
                 "blocks_deduped": dedup_result.blocks_deduped,
                 "blocks_total": dedup_result.blocks_total,
                 "docs_deduped": self._total_docs_deduped,
@@ -820,6 +908,14 @@ def _tool_chars(msgs):
             "payload_chars_saved": payload_chars_saved,
             "doc_chars_saved": doc_chars_saved,
             "block_chars_saved": dedup_result.chars_saved,
+            "prompt_dedup_mode": (
+                prompt_dedup_result.mode if prompt_dedup_result is not None else "off"
+            ),
+            "prompt_dedup_chars_saved": prompt_dedup_chars_saved,
+            "prompt_dedup_blocks_replaced": (
+                prompt_dedup_result.blocks_replaced
+                if prompt_dedup_result is not None and prompt_dedup_result.mutated else 0
+            ),
             "blocks_deduped": dedup_result.blocks_deduped,
             "blocks_total": dedup_result.blocks_total,
             "docs_deduped": self._total_docs_deduped,
diff --git a/contextpilot/hermes_opportunities/__init__.py b/contextpilot/hermes_opportunities/__init__.py
index 9b2591c..561c98a 100644
--- a/contextpilot/hermes_opportunities/__init__.py
+++ b/contextpilot/hermes_opportunities/__init__.py
@@ -18,13 +18,17 @@
 * :mod:`.db`          -- read-only Hermes state-DB loaders
 * :mod:`.telemetry`   -- metadata-only ContextPilot telemetry parsing
 * :mod:`.detection`   -- content-aware redundancy detection
+* :mod:`.dedup_ab`    -- offline prompt-dedup A/B simulation (no mutation)
+* :mod:`.prompt_dedup_canary` -- default-off runtime canary, skill-only
 * :mod:`.routing`     -- Worker Context Routing shadow mode (P0)
 * :mod:`.aggregation` -- Parent Aggregation Artifacts shadow mode (P0)
 * :mod:`.report`      -- report assembly + serialization
 * :mod:`.cli`         -- command-line entry point
 
-Everything here is reporting/measurement only: no module ever replaces,
-summarizes, routes, or otherwise mutates context at runtime.
+Safety contract: everything here is reporting/measurement only **except**
+:mod:`.prompt_dedup_canary`, which is the single default-off runtime mutation
+path. That canary is limited to same-type skill-prompt exact duplicates and only
+runs when explicitly enabled by environment variable.
 """
 from __future__ import annotations
 
@@ -92,6 +96,19 @@
     _salt_fingerprint,
     _salted_hash,
 )
+from .prompt_dedup_canary import (
+    CANARY_DEDUP_CLASS,
+    DEFAULT_PROMPT_DEDUP_MODE,
+    PROMPT_DEDUP_CANARY_REFERENCE_TEMPLATE,
+    PROMPT_DEDUP_DISABLE_ENV,
+    PROMPT_DEDUP_MODE_ENV,
+    PROMPT_DEDUP_MODES,
+    SAFETY_DENYLIST,
+    PromptDedupCanaryResult,
+    apply_prompt_dedup_canary,
+    build_canary_telemetry_record,
+    resolve_prompt_dedup_mode,
+)
 from .report import build_report, write_report
 from .routing import (
     ROUTER_LABELS,
@@ -116,6 +133,14 @@
     "ARTIFACT_KINDS",
     "PARENT_AGGREGATION_SOURCE_TYPES",
     "FORBIDDEN_OUTPUT_KEYS",
+    # prompt-dedup canary (runtime; default off)
+    "PROMPT_DEDUP_MODE_ENV",
+    "PROMPT_DEDUP_DISABLE_ENV",
+    "PROMPT_DEDUP_MODES",
+    "DEFAULT_PROMPT_DEDUP_MODE",
+    "CANARY_DEDUP_CLASS",
+    "PROMPT_DEDUP_CANARY_REFERENCE_TEMPLATE",
+    "SAFETY_DENYLIST",
     # dataclasses
     "DuplicateToolOutput",
     "RepeatedBlock",
@@ -155,6 +180,11 @@
     "simulate_prompt_dedup_ab",
     "TokenizerBackend",
     "resolve_tokenizer",
+    # prompt-dedup canary (runtime; default off)
+    "PromptDedupCanaryResult",
+    "resolve_prompt_dedup_mode",
+    "apply_prompt_dedup_canary",
+    "build_canary_telemetry_record",
     # routing (shadow)
     "classify_router_label",
     "analyze_worker_routing_shadow",
diff --git a/contextpilot/hermes_opportunities/prompt_dedup_canary.py b/contextpilot/hermes_opportunities/prompt_dedup_canary.py
new file mode 100644
index 0000000..cee6afa
--- /dev/null
+++ b/contextpilot/hermes_opportunities/prompt_dedup_canary.py
@@ -0,0 +1,340 @@
+"""Default-OFF prompt-dedup canary (the only runtime prompt mutation path).
+
+Everything else in this package is measurement/shadow/simulation only. This
+module is the single, narrowly-scoped place where ContextPilot may *actually*
+replace prompt text bound for the LLM -- and only when an operator has opted in
+via an environment variable, and only for the lowest-risk duplicate class.
+
+Risk gate (all conditions must hold before a single character is changed):
+
+* Mode must be ``canary``. The mode is read from
+  ``CONTEXTPILOT_PROMPT_DEDUP_MODE`` (``off`` | ``shadow`` | ``canary``) and
+  defaults to ``off``. ``off`` and ``shadow`` never mutate the payload.
+* The escape-hatch env ``CONTEXTPILOT_PROMPT_DEDUP_DISABLE`` (any truthy value)
+  forces ``off`` regardless of the mode var -- an immediate kill switch.
+* Only the ``same_type_skill_prompt_only`` class is eligible: an EXACT duplicate
+  block whose every occurrence is inside ``skill_prompt`` content. Duplicates
+  confined to ``system_prompt`` and cross-type ``system_prompt``/``skill_prompt``
+  blocks are NEVER replaced.
+* Only ``skill_prompt`` items are ever rewritten. ``system_prompt``,
+  ``user_prompt``, ``assistant_context`` and ``tool_result`` content is never
+  touched.
+* The first occurrence of each duplicate is kept verbatim; only later exact
+  occurrences are replaced, and only when the deterministic reference string is
+  strictly shorter than the line it replaces (never grows the payload).
+* Any block matching the safety denylist (instruction / safety / security /
+  tool / auth / secret / must / never / always / required / ...) is left
+  unchanged even in canary mode.
+
+The reference string carries only a low-cardinality prompt-type enum and a
+salted block hash -- never raw prompt content. Telemetry is metadata-only:
+mode/class enums and integer counters; no prompt text and no realized-savings
+claim unless an actual mutation occurred.
+"""
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from typing import Iterable
+
+from .models import PROMPT_DUPLICATE_BLOCK_TYPES, _LLMContent
+from .privacy import _assert_no_forbidden_keys, _salted_hash
+
+# Environment controls. ``off`` is the default and the safe state.
+PROMPT_DEDUP_MODE_ENV = "CONTEXTPILOT_PROMPT_DEDUP_MODE"
+PROMPT_DEDUP_DISABLE_ENV = "CONTEXTPILOT_PROMPT_DEDUP_DISABLE"
+PROMPT_DEDUP_MODES = ("off", "shadow", "canary")
+DEFAULT_PROMPT_DEDUP_MODE = "off"
+
+# The only duplicate class this canary will ever act on.
+CANARY_DEDUP_CLASS = "same_type_skill_prompt_only"
+
+# Deterministic placeholder left in place of a later duplicate occurrence. Unlike
+# the A/B simulation template ("... omitted in simulation ..."), this string is
+# really emitted into the payload, so it is labelled as a real ContextPilot
+# replacement. ``<type>`` / ``<hash>`` are low-cardinality only.
+PROMPT_DEDUP_CANARY_REFERENCE_TEMPLATE = (
+    "[ContextPilot dedup: duplicate skill_prompt block omitted; ref=<type>:<hash>]"
+)
+
+# Safety denylist. If any of these case-insensitive substrings appears in a
+# duplicate block, the block is left untouched even in canary mode. The list is
+# deliberately broad: it is always safe to skip a replacement, never safe to
+# silently rewrite a hard instruction / safety / security / auth / secret line.
+SAFETY_DENYLIST = (
+    "instruction",
+    "instructions",
+    "safety",
+    "security",
+    "secure",
+    "tool",
+    "auth",
+    "authenticate",
+    "authentication",
+    "authorization",
+    "secret",
+    "credential",
+    "password",
+    "api key",
+    "token",
+    "must",
+    "never",
+    "always",
+    "required",
+    "require",
+    "do not",
+    "don't",
+    "important",
+    "critical",
+    "mandatory",
+    "forbidden",
+    "permission",
+    "sensitive",
+    "confidential",
+    "policy",
+    "verify",
+)
+
+
+@dataclass
+class PromptDedupCanaryResult:
+    """Metadata-only outcome of a canary pass. No raw prompt text, ever.
+
+    ``chars_saved`` / ``blocks_replaced`` are REALIZED figures and are non-zero
+    only when ``mode == 'canary'`` and an actual replacement occurred. The
+    ``candidate_*`` fields are advisory (what a canary *would* replace) and are
+    populated in ``shadow`` mode for visibility without mutating anything.
+    """
+
+    mode: str                      # off | shadow | canary
+    prompt_dedup_class: str        # always CANARY_DEDUP_CLASS
+    mutated: bool                  # True only if a real replacement happened
+    item_count: int                # system/skill prompt items scanned
+    skill_item_count: int          # skill_prompt items among them
+    candidate_block_count: int     # eligible skill-only duplicate groups
+    candidate_chars: int           # advisory chars later occurrences occupy
+    blocks_replaced: int           # REALIZED replacements (canary only)
+    chars_saved: int               # REALIZED chars saved (canary only)
+    denylisted_block_count: int    # skill-only duplicate groups skipped by denylist
+    notes: list[str] = field(default_factory=list)
+
+
+def _truthy(value: str | None) -> bool:
+    return bool(value) and value.strip().lower() not in ("", "0", "false", "no", "off")
+
+
+def resolve_prompt_dedup_mode(env: dict | None = None) -> str:
+    """Resolve the active prompt-dedup mode, defaulting to the safe ``off``.
+
+    Unknown values fall back to ``off``. The escape-hatch disable variable, when
+    truthy, forces ``off`` regardless of the mode variable.
+    """
+    source = os.environ if env is None else env
+    if _truthy(source.get(PROMPT_DEDUP_DISABLE_ENV)):
+        return "off"
+    raw = (source.get(PROMPT_DEDUP_MODE_ENV) or DEFAULT_PROMPT_DEDUP_MODE).strip().lower()
+    return raw if raw in PROMPT_DEDUP_MODES else DEFAULT_PROMPT_DEDUP_MODE
+
+
+def _is_denied(block: str) -> bool:
+    """Conservative safety gate: any denylist substring blocks replacement."""
+    low = block.lower()
+    return any(keyword in low for keyword in SAFETY_DENYLIST)
+
+
+def _reference_string(canonical_type: str, block_hash: str) -> str:
+    return PROMPT_DEDUP_CANARY_REFERENCE_TEMPLATE.replace(
+        "<type>", canonical_type
+    ).replace("<hash>", block_hash)
+
+
+def _build_eligibility(
+    contents: list[_LLMContent], *, salt: str, min_block_chars: int
+) -> tuple[dict[str, str], int, int, int]:
+    """Fingerprint system/skill blocks and return the canary-eligible hashes.
+
+    Returns ``(eligible, candidate_chars, denylisted, item_count)`` where
+    ``eligible`` maps a block hash to its reference string. A hash is eligible
+    only when it is an EXACT duplicate (occurs 2+ times), every occurrence is in
+    ``skill_prompt`` content (same_type_skill_prompt_only), and the block does
+    not match the safety denylist.
+    """
+    # hash -> {char_length, types: {block_type: occ}, denied}
+    agg: dict[str, dict] = {}
+    item_count = 0
+    for item in contents:
+        bt = item.block_type
+        if bt not in PROMPT_DUPLICATE_BLOCK_TYPES:
+            continue
+        item_count += 1
+        for raw_line in item.content.split("\n"):
+            block = raw_line.strip()
+            if len(block) < min_block_chars:
+                continue
+            h = _salted_hash(block, salt)
+            entry = agg.get(h)
+            if entry is None:
+                agg[h] = {
+                    "char_length": len(block),
+                    "types": {bt: 1},
+                    "denied": _is_denied(block),
+                }
+            else:
+                entry["types"][bt] = entry["types"].get(bt, 0) + 1
+
+    eligible: dict[str, str] = {}
+    candidate_chars = 0
+    denylisted = 0
+    for h, entry in agg.items():
+        types = entry["types"]
+        occ = sum(types.values())
+        if occ < 2:
+            continue  # not a duplicate -> nothing to replace
+        # same_type_skill_prompt_only: every occurrence is a skill prompt block.
+        if set(types) != {"skill_prompt"}:
+            continue
+        if entry["denied"]:
+            denylisted += 1
+            continue
+        eligible[h] = _reference_string("skill_prompt", h)
+        # Advisory: chars the later (replaceable) occurrences currently occupy.
+        candidate_chars += (occ - 1) * entry["char_length"]
+    return eligible, candidate_chars, denylisted, item_count
+
+
+def apply_prompt_dedup_canary(
+    contents: Iterable[_LLMContent],
+    *,
+    salt: str,
+    min_block_chars: int,
+    mode: str | None = None,
+    env: dict | None = None,
+) -> PromptDedupCanaryResult:
+    """Run the prompt-dedup canary over LLM-bound content.
+
+    ``contents`` are the in-memory ``_LLMContent`` items bound for the LLM. In
+    ``canary`` mode this MUTATES the ``content`` of eligible ``skill_prompt``
+    items in place (keeping the first occurrence, replacing later exact
+    duplicates with a deterministic reference string). In ``off`` and ``shadow``
+    modes nothing is mutated.
+
+    ``mode`` overrides the resolved environment mode (used by tests); otherwise
+    the mode comes from :func:`resolve_prompt_dedup_mode`.
+    """
+    items = list(contents)
+    resolved = mode if mode is not None else resolve_prompt_dedup_mode(env)
+    if resolved not in PROMPT_DEDUP_MODES:
+        resolved = DEFAULT_PROMPT_DEDUP_MODE
+
+    skill_item_count = sum(1 for it in items if it.block_type == "skill_prompt")
+
+    if resolved == "off":
+        # Safe default: no scan, no candidates, no savings.
+        return PromptDedupCanaryResult(
+            mode="off",
+            prompt_dedup_class=CANARY_DEDUP_CLASS,
+            mutated=False,
+            item_count=0,
+            skill_item_count=skill_item_count,
+            candidate_block_count=0,
+            candidate_chars=0,
+            blocks_replaced=0,
+            chars_saved=0,
+            denylisted_block_count=0,
+            notes=["prompt-dedup canary off (default): payload unchanged"],
+        )
+
+    eligible, candidate_chars, denylisted, item_count = _build_eligibility(
+        items, salt=salt, min_block_chars=min_block_chars
+    )
+
+    if resolved == "shadow":
+        # Measure what a canary would replace, but never touch the payload.
+        return PromptDedupCanaryResult(
+            mode="shadow",
+            prompt_dedup_class=CANARY_DEDUP_CLASS,
+            mutated=False,
+            item_count=item_count,
+            skill_item_count=skill_item_count,
+            candidate_block_count=len(eligible),
+            candidate_chars=candidate_chars,
+            blocks_replaced=0,
+            chars_saved=0,
+            denylisted_block_count=denylisted,
+            notes=["prompt-dedup canary shadow: candidates measured, payload unchanged"],
+        )
+
+    # --- canary: the ONLY branch that mutates LLM-bound payload ---------------
+    blocks_replaced = 0
+    chars_saved = 0
+    consumed: set[str] = set()  # hashes whose first (kept) occurrence was seen
+    for item in items:
+        if item.block_type != "skill_prompt":
+            continue  # never touch system/user/assistant/tool content
+        if not eligible:
+            break
+        new_lines: list[str] = []
+        changed = False
+        for raw_line in item.content.split("\n"):
+            block = raw_line.strip()
+            if len(block) < min_block_chars:
+                new_lines.append(raw_line)
+                continue
+            h = _salted_hash(block, salt)
+            ref = eligible.get(h)
+            if ref is None:
+                new_lines.append(raw_line)
+                continue
+            if h not in consumed:
+                consumed.add(h)  # keep the first occurrence verbatim
+                new_lines.append(raw_line)
+                continue
+            # Later exact duplicate: replace only when it actually shrinks the line.
+            if len(ref) < len(raw_line):
+                new_lines.append(ref)
+                blocks_replaced += 1
+                chars_saved += len(raw_line) - len(ref)
+                changed = True
+            else:
+                new_lines.append(raw_line)
+        if changed:
+            item.content = "\n".join(new_lines)
+
+    notes = ["prompt-dedup canary active: same_type_skill_prompt_only duplicates only"]
+    if denylisted:
+        notes.append(f"{denylisted} skill-only duplicate group(s) skipped by safety denylist")
+    return PromptDedupCanaryResult(
+        mode="canary",
+        prompt_dedup_class=CANARY_DEDUP_CLASS,
+        mutated=blocks_replaced > 0,
+        item_count=item_count,
+        skill_item_count=skill_item_count,
+        candidate_block_count=len(eligible),
+        candidate_chars=candidate_chars,
+        blocks_replaced=blocks_replaced,
+        chars_saved=chars_saved,
+        denylisted_block_count=denylisted,
+        notes=notes,
+    )
+
+
+def build_canary_telemetry_record(result: PromptDedupCanaryResult) -> dict:
+    """Build a metadata-only telemetry record for a canary pass.
+
+    The aggregate ``chars_saved`` counter gains the prompt-dedup contribution
+    ONLY when a real mutation occurred (canary). ``off``/``shadow`` contribute 0
+    to the total while still reporting the separated ``prompt_dedup_*`` fields.
+    Contains only mode/class enums and integer counters -- never prompt text.
+    """
+    realized = result.chars_saved if result.mutated else 0
+    record = {
+        "prompt_dedup_mode": result.mode,
+        "prompt_dedup_class": result.prompt_dedup_class,
+        "prompt_dedup_blocks_replaced": result.blocks_replaced if result.mutated else 0,
+        # Separated field: always present, mirrors the realized prompt-dedup save.
+        "prompt_dedup_chars_saved": realized,
+        # Aggregate total: includes prompt dedup only when a mutation occurred.
+        "chars_saved": realized,
+    }
+    _assert_no_forbidden_keys(record)
+    return record
diff --git a/docs/guides/hermes-monitor.md b/docs/guides/hermes-monitor.md
index 8d62093..b7d6c53 100644
--- a/docs/guides/hermes-monitor.md
+++ b/docs/guides/hermes-monitor.md
@@ -178,6 +178,61 @@ Use `--disable-prompt-dedup-ab` to omit this section. Even when enabled, all
 figures are **simulation-only**, **not realized savings**, and no prompt text is
 rewritten, summarized, deduplicated, or emitted.
 
+### Prompt dedup canary (runtime; default OFF)
+
+> **Use only after the A/B simulation above shows a clear, positive
+> `same_type_skill_prompt_only` delta and you have a golden eval in place.**
+> This is the one ContextPilot path that *actually rewrites prompt text*; treat
+> it as gray/canary, not default behavior.
+
+Everything else in the analyzer is measurement/shadow/simulation only. The
+canary (`contextpilot.hermes_opportunities.prompt_dedup_canary`) is the single
+runtime replacement path and it is **off by default**. It is controlled entirely
+by environment variables — no config file is required:
+
+```sh
+# off (default): no scan, no mutation, no prompt-dedup savings recorded
+CONTEXTPILOT_PROMPT_DEDUP_MODE=off
+
+# shadow: measure what a canary *would* replace; payload still unchanged
+CONTEXTPILOT_PROMPT_DEDUP_MODE=shadow
+
+# canary: actually replace later exact duplicate skill-prompt blocks
+CONTEXTPILOT_PROMPT_DEDUP_MODE=canary
+```
+
+**Rollback / kill switch.** Set the mode back to `off` (or unset the variable)
+to disable immediately. The escape-hatch variable forces `off` regardless of the
+mode variable, for an instant kill without editing the mode:
+
+```sh
+CONTEXTPILOT_PROMPT_DEDUP_DISABLE=1   # forces off even if MODE=canary
+```
+
+What the canary will and will not do, even when `MODE=canary`:
+
+- It acts **only** on the `same_type_skill_prompt_only` class — an EXACT
+  duplicate block whose every occurrence is inside `skill_prompt` content.
+- It **never** replaces `system_prompt`-only duplicates, **never** replaces
+  cross-type `system_prompt`/`skill_prompt` duplicates, and **never** touches
+  user, assistant, tool, or ordinary system-prompt content.
+- The **first** occurrence is always kept verbatim; only later exact duplicates
+  are replaced, and only with a deterministic reference string containing a
+  low-cardinality prompt-type enum plus a salted hash — never raw prompt text.
+- A replacement happens only when the reference string is **strictly shorter**
+  than the line it replaces, so the payload is never grown.
+- A broad **safety denylist** (instruction / safety / security / tool / auth /
+  secret / must / never / always / required / ...) leaves any matching block
+  unchanged even in canary mode. Skill-prompt detection is conservative: if a
+  block is not clearly a skill-prompt duplicate, it is left as-is.
+
+Telemetry is metadata-only: `prompt_dedup_mode`, `prompt_dedup_class`,
+`prompt_dedup_blocks_replaced`, and `prompt_dedup_chars_saved` (mode/class enums
+and integer counters only — no prompt text). The realized `prompt_dedup_chars_saved`
+and its contribution to the aggregate `chars_saved` total are non-zero **only
+when a real canary mutation occurred**; `off` and `shadow` record no prompt-dedup
+savings.
+
 ### Worker Context Routing shadow mode
 
 The analyzer now includes a **Worker Context Routing — shadow mode** section by
diff --git a/tests/test_hermes_plugin_patch.py b/tests/test_hermes_plugin_patch.py
index d328d01..6909f47 100644
--- a/tests/test_hermes_plugin_patch.py
+++ b/tests/test_hermes_plugin_patch.py
@@ -364,3 +364,116 @@ def test_optimize_survives_unwritable_telemetry_path(monkeypatch, tmp_path):
     out, stats = engine.optimize_api_messages(messages)
     assert out[1]["content"] == "REF"
     assert stats["chars_saved"] > 0
+
+
+def _zero_dedup(body, **kwargs):
+    return SimpleNamespace(
+        chars_saved=0,
+        blocks_deduped=0,
+        blocks_total=0,
+        system_blocks_matched=0,
+    )
+
+
+def test_prompt_dedup_canary_default_off_does_not_mutate_runtime(monkeypatch, tmp_path):
+    module, _ = _load_plugin_module(monkeypatch)
+    monkeypatch.setattr(module, "_check_reorder", lambda: False)
+    monkeypatch.setattr(module, "_CONTEXTPILOT_AVAILABLE", False)
+    monkeypatch.setattr(module, "dedup_chat_completions", _zero_dedup)
+    telemetry = tmp_path / "telemetry.jsonl"
+    monkeypatch.setenv("CONTEXTPILOT_TELEMETRY_FILE", str(telemetry))
+    monkeypatch.delenv("CONTEXTPILOT_PROMPT_DEDUP_MODE", raising=False)
+
+    repeated = (
+        "Reusable examples paragraph for skill notes with enough descriptive filler "
+        "to make the reference shorter than the duplicate body in this test."
+    )
+    content = f"Use this skill when testing.\n{repeated}\n{repeated}"
+    engine = module.ContextPilotEngine()
+    out, stats = engine.optimize_api_messages([{"role": "system", "content": content}])
+
+    assert out[0]["content"] == content
+    assert stats["prompt_dedup_mode"] == "off"
+    assert stats["prompt_dedup_chars_saved"] == 0
+    assert not telemetry.exists()
+
+
+def test_prompt_dedup_canary_mutates_only_skill_prompt_runtime(monkeypatch, tmp_path):
+    import json
+
+    module, _ = _load_plugin_module(monkeypatch)
+    monkeypatch.setattr(module, "_check_reorder", lambda: False)
+    monkeypatch.setattr(module, "_CONTEXTPILOT_AVAILABLE", False)
+    monkeypatch.setattr(module, "dedup_chat_completions", _zero_dedup)
+    monkeypatch.setenv("CONTEXTPILOT_PROMPT_DEDUP_MODE", "canary")
+    telemetry = tmp_path / "telemetry.jsonl"
+    monkeypatch.setenv("CONTEXTPILOT_TELEMETRY_FILE", str(telemetry))
+
+    repeated = (
+        "Reusable examples paragraph for skill notes with enough descriptive filler "
+        "to make the reference shorter than the duplicate body in this test."
+    )
+    skill_content = f"Use this skill when testing.\n{repeated}\n{repeated}"
+    ordinary_system = "ordinary system heading\nordinary system text stays untouched"
+    user_content = f"{repeated}\n{repeated}"
+
+    engine = module.ContextPilotEngine()
+    out, stats = engine.optimize_api_messages(
+        [
+            {"role": "system", "content": skill_content},
+            {"role": "system", "content": ordinary_system},
+            {"role": "user", "content": user_content},
+        ]
+    )
+
+    assert repeated in out[0]["content"]  # first occurrence kept
+    assert out[0]["content"].count(repeated) == 1
+    assert "ContextPilot dedup: duplicate skill_prompt block omitted" in out[0]["content"]
+    # Ordinary system and user content are untouched.
+    assert out[1]["content"] == ordinary_system
+    assert out[2]["content"] == user_content
+    assert stats["prompt_dedup_mode"] == "canary"
+    assert stats["prompt_dedup_blocks_replaced"] == 1
+    assert stats["prompt_dedup_chars_saved"] > 0
+    assert stats["chars_saved"] == stats["prompt_dedup_chars_saved"]
+
+    record = json.loads(telemetry.read_text(encoding="utf-8").splitlines()[0])
+    assert record["prompt_dedup_mode"] == "canary"
+    assert record["prompt_dedup_class"] == "same_type_skill_prompt_only"
+    assert record["prompt_dedup_blocks_replaced"] == 1
+    assert record["prompt_dedup_chars_saved"] == stats["prompt_dedup_chars_saved"]
+    raw = telemetry.read_text(encoding="utf-8")
+    assert repeated not in raw
+    assert "Use this skill" not in raw
+
+
+def test_prompt_dedup_canary_does_not_replace_cross_type_or_denylisted_runtime(monkeypatch):
+    module, _ = _load_plugin_module(monkeypatch)
+    monkeypatch.setattr(module, "_check_reorder", lambda: False)
+    monkeypatch.setattr(module, "_CONTEXTPILOT_AVAILABLE", False)
+    monkeypatch.setattr(module, "dedup_chat_completions", _zero_dedup)
+    monkeypatch.setenv("CONTEXTPILOT_PROMPT_DEDUP_MODE", "canary")
+
+    cross = (
+        "Shared examples paragraph across prompts with enough descriptive filler "
+        "to be tempting but cross hierarchy should stay unchanged."
+    )
+    denied = (
+        "This duplicate line contains secret handling details and enough filler "
+        "to be long but should be blocked by denylist."
+    )
+    skill_content = f"Use this skill when testing.\n{cross}\n{denied}\n{denied}"
+    ordinary_system = f"ordinary system heading\n{cross}"
+
+    engine = module.ContextPilotEngine()
+    out, stats = engine.optimize_api_messages(
+        [
+            {"role": "system", "content": skill_content},
+            {"role": "system", "content": ordinary_system},
+        ]
+    )
+
+    assert out[0]["content"] == skill_content
+    assert out[1]["content"] == ordinary_system
+    assert stats["prompt_dedup_chars_saved"] == 0
+    assert stats["prompt_dedup_blocks_replaced"] == 0
diff --git a/tests/test_prompt_dedup_canary.py b/tests/test_prompt_dedup_canary.py
new file mode 100644
index 0000000..6bf8d91
--- /dev/null
+++ b/tests/test_prompt_dedup_canary.py
@@ -0,0 +1,304 @@
+"""Tests for the default-off prompt-dedup canary (runtime prompt mutation).
+
+The canary is the only place ContextPilot may actually rewrite prompt text. These
+tests pin the safety gate: default off (no mutation), canary touches ONLY
+same_type_skill_prompt_only duplicates (first occurrence kept), never touches
+system/cross-type/user/assistant/tool content, honours the safety denylist and
+the escape-hatch kill switch, never grows the payload, and emits metadata-only
+telemetry with no raw prompt text.
+"""
+import json
+
+from contextpilot.hermes_opportunities import (
+    CANARY_DEDUP_CLASS,
+    PROMPT_DEDUP_DISABLE_ENV,
+    PROMPT_DEDUP_MODE_ENV,
+    apply_prompt_dedup_canary,
+    build_canary_telemetry_record,
+    resolve_prompt_dedup_mode,
+    _LLMContent,
+)
+from contextpilot.hermes_opportunities.prompt_dedup_canary import (
+    _reference_string,
+    _salted_hash,
+)
+
+SALT = "test-salt"
+MIN = 40
+
+# A benign skill block, comfortably longer than the reference placeholder so a
+# replacement actually saves characters, and free of any denylist keyword.
+LONG_SKILL = (
+    "Example reusable skill paragraph describing how the helper reformats "
+    "markdown tables into neat aligned columns for the reader."
+)
+SYS_BLOCK = (
+    "Plain system narration paragraph describing the assistant persona and the "
+    "general tone it should adopt across replies."
+)
+
+
+def _ref_len() -> int:
+    return len(_reference_string("skill_prompt", _salted_hash(LONG_SKILL, SALT)))
+
+
+# ---------------------------------------------------------------------------
+# Mode resolution + escape hatch
+# ---------------------------------------------------------------------------
+
+
+def test_mode_defaults_to_off(monkeypatch):
+    monkeypatch.delenv(PROMPT_DEDUP_MODE_ENV, raising=False)
+    monkeypatch.delenv(PROMPT_DEDUP_DISABLE_ENV, raising=False)
+    assert resolve_prompt_dedup_mode() == "off"
+
+
+def test_mode_reads_env_values():
+    assert resolve_prompt_dedup_mode({PROMPT_DEDUP_MODE_ENV: "shadow"}) == "shadow"
+    assert resolve_prompt_dedup_mode({PROMPT_DEDUP_MODE_ENV: "CANARY"}) == "canary"
+    # Unknown / garbage values fall back to the safe default.
+    assert resolve_prompt_dedup_mode({PROMPT_DEDUP_MODE_ENV: "aggressive"}) == "off"
+
+
+def test_disable_env_is_a_kill_switch():
+    env = {PROMPT_DEDUP_MODE_ENV: "canary", PROMPT_DEDUP_DISABLE_ENV: "1"}
+    assert resolve_prompt_dedup_mode(env) == "off"
+
+
+# ---------------------------------------------------------------------------
+# Default off never mutates
+# ---------------------------------------------------------------------------
+
+
+def test_default_off_does_not_change_payload(monkeypatch):
+    monkeypatch.delenv(PROMPT_DEDUP_MODE_ENV, raising=False)
+    monkeypatch.delenv(PROMPT_DEDUP_DISABLE_ENV, raising=False)
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}\n{LONG_SKILL}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(contents, salt=SALT, min_block_chars=MIN)
+    assert result.mode == "off"
+    assert result.mutated is False
+    assert result.blocks_replaced == 0
+    assert result.chars_saved == 0
+    assert contents[0].content == before  # payload byte-identical
+
+
+def test_disable_env_blocks_mutation_even_with_canary_set(monkeypatch):
+    monkeypatch.setenv(PROMPT_DEDUP_MODE_ENV, "canary")
+    monkeypatch.setenv(PROMPT_DEDUP_DISABLE_ENV, "true")
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(contents, salt=SALT, min_block_chars=MIN)
+    assert result.mode == "off"
+    assert contents[0].content == before
+
+
+# ---------------------------------------------------------------------------
+# Canary replaces only same_type_skill_prompt_only duplicates
+# ---------------------------------------------------------------------------
+
+
+def test_canary_replaces_later_skill_duplicates_keeps_first():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}\n{LONG_SKILL}")]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    lines = contents[0].content.split("\n")
+    assert lines[0] == LONG_SKILL  # first occurrence kept verbatim
+    assert lines[1] != LONG_SKILL and lines[2] != LONG_SKILL  # later ones replaced
+    assert lines[1] == lines[2]  # deterministic reference string
+    assert result.mode == "canary"
+    assert result.mutated is True
+    assert result.blocks_replaced == 2
+    assert result.prompt_dedup_class == CANARY_DEDUP_CLASS
+    assert result.chars_saved == 2 * (len(LONG_SKILL) - _ref_len())
+
+
+def test_canary_replacement_carries_no_raw_content():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")]
+    apply_prompt_dedup_canary(contents, salt=SALT, min_block_chars=MIN, mode="canary")
+    ref_line = contents[0].content.split("\n")[1]
+    # The reference holds only a type enum + salted hash, never the block text.
+    assert "skill_prompt" in ref_line
+    assert LONG_SKILL not in ref_line
+
+
+def test_canary_replicates_across_two_skill_items():
+    # Same block in two separate skill_prompt items: first item keeps it, the
+    # occurrence in the second item is replaced.
+    a = _LLMContent("skill_prompt", LONG_SKILL)
+    b = _LLMContent("skill_prompt", LONG_SKILL)
+    result = apply_prompt_dedup_canary(
+        [a, b], salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert a.content == LONG_SKILL
+    assert b.content != LONG_SKILL
+    assert result.blocks_replaced == 1
+
+
+# ---------------------------------------------------------------------------
+# Canary must NOT touch other classes / roles
+# ---------------------------------------------------------------------------
+
+
+def test_canary_leaves_system_only_duplicates_untouched():
+    contents = [_LLMContent("system_prompt", f"{SYS_BLOCK}\n{SYS_BLOCK}\n{SYS_BLOCK}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert contents[0].content == before
+    assert result.blocks_replaced == 0
+    assert result.candidate_block_count == 0
+
+
+def test_canary_leaves_cross_type_duplicates_untouched():
+    # The same block appears in BOTH a system and a skill prompt -> cross-type,
+    # never eligible for the skill-only canary.
+    skill = _LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")
+    system = _LLMContent("system_prompt", LONG_SKILL)
+    skill_before = skill.content
+    result = apply_prompt_dedup_canary(
+        [skill, system], salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert skill.content == skill_before
+    assert result.blocks_replaced == 0
+    assert result.candidate_block_count == 0
+
+
+def test_canary_leaves_user_assistant_tool_untouched():
+    contents = [
+        _LLMContent("user_prompt", f"{LONG_SKILL}\n{LONG_SKILL}"),
+        _LLMContent("assistant_context", f"{LONG_SKILL}\n{LONG_SKILL}"),
+        _LLMContent("tool_result", f"{LONG_SKILL}\n{LONG_SKILL}"),
+    ]
+    befores = [c.content for c in contents]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert [c.content for c in contents] == befores
+    assert result.blocks_replaced == 0
+    # Non system/skill items are not even scanned for candidates.
+    assert result.item_count == 0
+
+
+# ---------------------------------------------------------------------------
+# Safety denylist + payload-growth guard
+# ---------------------------------------------------------------------------
+
+
+def test_denylisted_blocks_are_not_replaced():
+    danger = (
+        "You must always follow this required safety rule precisely and never "
+        "skip it under any circumstances whatsoever here."
+    )
+    contents = [_LLMContent("skill_prompt", f"{danger}\n{danger}\n{danger}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert contents[0].content == before  # untouched
+    assert result.blocks_replaced == 0
+    assert result.denylisted_block_count == 1
+
+
+def test_canary_never_grows_payload_for_short_duplicates():
+    # A duplicate shorter than the reference placeholder would grow if replaced,
+    # so it is left alone.
+    short = "Short but over forty chars skill helper line."
+    assert len(short) < _ref_len()
+    contents = [_LLMContent("skill_prompt", f"{short}\n{short}\n{short}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    assert contents[0].content == before
+    assert result.blocks_replaced == 0
+    assert result.chars_saved == 0
+
+
+# ---------------------------------------------------------------------------
+# Shadow mode measures but does not mutate
+# ---------------------------------------------------------------------------
+
+
+def test_shadow_measures_candidates_without_mutating():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}\n{LONG_SKILL}")]
+    before = contents[0].content
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="shadow"
+    )
+    assert contents[0].content == before  # never mutated
+    assert result.mode == "shadow"
+    assert result.mutated is False
+    assert result.blocks_replaced == 0
+    assert result.candidate_block_count == 1
+    assert result.candidate_chars == 2 * len(LONG_SKILL)
+
+
+# ---------------------------------------------------------------------------
+# Telemetry: metadata-only, savings only when a real mutation happened
+# ---------------------------------------------------------------------------
+
+
+def test_telemetry_records_no_savings_when_off():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="off"
+    )
+    record = build_canary_telemetry_record(result)
+    assert record["prompt_dedup_mode"] == "off"
+    assert record["prompt_dedup_chars_saved"] == 0
+    assert record["prompt_dedup_blocks_replaced"] == 0
+    assert record["chars_saved"] == 0
+
+
+def test_telemetry_records_no_savings_in_shadow():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="shadow"
+    )
+    record = build_canary_telemetry_record(result)
+    assert record["prompt_dedup_mode"] == "shadow"
+    # Shadow contributes nothing to the realized chars_saved total.
+    assert record["chars_saved"] == 0
+    assert record["prompt_dedup_chars_saved"] == 0
+
+
+def test_telemetry_records_realized_savings_in_canary():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}\n{LONG_SKILL}")]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    record = build_canary_telemetry_record(result)
+    expected = 2 * (len(LONG_SKILL) - _ref_len())
+    assert record["prompt_dedup_mode"] == "canary"
+    assert record["prompt_dedup_class"] == CANARY_DEDUP_CLASS
+    assert record["prompt_dedup_blocks_replaced"] == 2
+    assert record["prompt_dedup_chars_saved"] == expected
+    # The aggregate counter includes prompt dedup only because a mutation occurred.
+    assert record["chars_saved"] == expected
+
+
+def test_telemetry_is_metadata_only_no_prompt_text():
+    contents = [_LLMContent("skill_prompt", f"{LONG_SKILL}\n{LONG_SKILL}")]
+    result = apply_prompt_dedup_canary(
+        contents, salt=SALT, min_block_chars=MIN, mode="canary"
+    )
+    record = build_canary_telemetry_record(result)
+    blob = json.dumps(record)
+    assert LONG_SKILL not in blob
+    # Only low-cardinality enums + integer counters are present.
+    assert set(record) == {
+        "prompt_dedup_mode",
+        "prompt_dedup_class",
+        "prompt_dedup_blocks_replaced",
+        "prompt_dedup_chars_saved",
+        "chars_saved",
+    }
+    for key in (
+        "prompt_dedup_blocks_replaced",
+        "prompt_dedup_chars_saved",
+        "chars_saved",
+    ):
+        assert isinstance(record[key], int)