From aefb12ab24d3faf3b861c7abec89767c2b2661c2 Mon Sep 17 00:00:00 2001
From: Naitik Soni <91239827+naitik-mixpanel@users.noreply.github.com>
Date: Wed, 10 Jun 2026 16:01:47 +0530
Subject: [PATCH] new skill: monitor-metrics

---
 .../.fuse_hidden0000000700000001              | 459 +++++++++++++++++
 .../.fuse_hidden0000000d00000006              | 459 +++++++++++++++++
 .../.fuse_hidden0000000e00000007              | 459 +++++++++++++++++
 .../.fuse_hidden0000000f00000008              | 459 +++++++++++++++++
 .../.fuse_hidden0000001000000009              | 459 +++++++++++++++++
 .../skills/monitor-metrics/SKILL.md           | 462 +++++++++++++++++
 .../commands/metric-anomaly.md                | 236 +++++++++
 .../monitor-metrics/commands/metric-drift.md  | 319 ++++++++++++
 .../monitor-metrics/commands/metric-rca.md    | 484 ++++++++++++++++++
 .../.fuse_hidden0000000700000001              | 459 +++++++++++++++++
 .../.fuse_hidden0000000800000002              | 459 +++++++++++++++++
 .../.fuse_hidden0000000900000003              | 459 +++++++++++++++++
 .../.fuse_hidden0000000a00000004              | 459 +++++++++++++++++
 .../skills/monitor-metrics/SKILL.md           | 462 +++++++++++++++++
 .../commands/metric-anomaly.md                | 236 +++++++++
 .../monitor-metrics/commands/metric-drift.md  | 319 ++++++++++++
 .../monitor-metrics/commands/metric-rca.md    | 484 ++++++++++++++++++
 .../.fuse_hidden0000000700000001              | 459 +++++++++++++++++
 .../.fuse_hidden0000000700000002              | 459 +++++++++++++++++
 .../.fuse_hidden0000000800000003              | 459 +++++++++++++++++
 .../.fuse_hidden0000000900000004              | 459 +++++++++++++++++
 .../.fuse_hidden0000000a00000005              | 459 +++++++++++++++++
 .../skills/monitor-metrics/SKILL.md           | 462 +++++++++++++++++
 .../commands/metric-anomaly.md                | 242 +++++++++
 .../monitor-metrics/commands/metric-drift.md  | 319 ++++++++++++
 .../monitor-metrics/commands/metric-rca.md    | 484 ++++++++++++++++++
 26 files changed, 10935 insertions(+)
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000700000001
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000d00000006
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000e00000007
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000f00000008
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000001000000009
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/SKILL.md
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-anomaly.md
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-drift.md
 create mode 100644 plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-rca.md
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000700000001
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000800000002
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000900000003
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000a00000004
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/SKILL.md
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-anomaly.md
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-drift.md
 create mode 100644 plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-rca.md
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000001
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000002
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000800000003
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000900000004
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000a00000005
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/SKILL.md
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-anomaly.md
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-drift.md
 create mode 100644 plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-rca.md

diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000700000001 b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000700000001
new file mode 100644
index 0000000..11a1684
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000700000001
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `Mixpanel MCP:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `Mixpanel MCP:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `Mixpanel MCP:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `Mixpanel MCP:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `Mixpanel MCP:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000d00000006 b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000d00000006
new file mode 100644
index 0000000..11a1684
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000d00000006
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `Mixpanel MCP:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `Mixpanel MCP:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `Mixpanel MCP:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `Mixpanel MCP:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `Mixpanel MCP:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000e00000007 b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000e00000007
new file mode 100644
index 0000000..aaa9bc7
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000e00000007
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-eu:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-eu:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-eu:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-eu:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-eu:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000f00000008 b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000f00000008
new file mode 100644
index 0000000..75bf536
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000000f00000008
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-eu` connector (Mixpanel EU). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-eu:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-eu:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-eu:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-eu:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-eu:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000001000000009 b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000001000000009
new file mode 100644
index 0000000..cf21368
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/.fuse_hidden0000001000000009
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp-eu` connector (Mixpanel EU).
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-eu` connector (Mixpanel EU). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-eu:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-eu:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-eu:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-eu:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-eu:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/SKILL.md b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/SKILL.md
new file mode 100644
index 0000000..acf4362
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/SKILL.md
@@ -0,0 +1,462 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp-eu` connector (Mixpanel EU).
+---
+
+# Monitor Metrics
+
+> **Connector:** This skill operates exclusively against the `mixpanel-mcp-eu` connector (Mixpanel EU region). Every Mixpanel MCP tool call in this SKILL.md and in every file under `commands/` must be routed through `mixpanel-mcp-eu` — never any other Mixpanel connector.
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-eu` connector (Mixpanel EU). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-eu:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-eu:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-eu:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-eu:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-eu:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `List-Properties` with
+   `names=[<filter_property>]` and `resource_type=<Event|User>` (pass
+   `events=[<event>]` to scope to a specific event's properties). If it
+   doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-anomaly.md b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-anomaly.md
new file mode 100644
index 0000000..25530aa
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-anomaly.md
@@ -0,0 +1,236 @@
+# Command: metric-anomaly
+
+Detect point-in-time anomalies in a single metric — recent spikes, drops, and
+clusters. Produces a verdict on *whether* something unusual happened at a
+specific moment. Does **not** test for trend-level drift (run `metric-drift`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-drift.md` — edits to one must be mirrored in the other._
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Purpose |
+|---|---|---|---|
+| Q1-hourly | Last 7 days | `hour` | Recent-blip detection |
+| Q1-daily | Last 30 days | `day` | Recent-day detection against a fuller baseline |
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+Build the `Run-Query` body from `query_template` with only `dateRange` and
+`unit` (granularity) overridden. Use `timeComparison` when a single call can
+cover both windows.
+
+---
+
+## Phase 2 — Outlier tests (Z-score + IQR, time-bucketed)
+
+For each series independently, compute the expected range at every timestamp.
+Run **both** tests; flag a point if **either** test flags it. Report which
+test(s) caught each flag.
+
+### Test 1 — Z-score against time-bucketed mean
+
+- For the **hourly** series: group all points by hour-of-day (0–23) and day-of-week (7 × 24 = 168 buckets). Compute mean (μ) and stddev (σ) per bucket across the 7-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- For the **daily** series: group by day-of-week (7 buckets). Compute μ and σ across the 30-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- Handle low-variance buckets: if σ is <5% of μ, skip the Z-score for that bucket and fall back to IQR only (division by tiny σ creates false alarms).
+
+### Test 2 — IQR against time-bucketed median
+
+- Same bucketing scheme as Test 1.
+- For each bucket, compute Q1, median, Q3, and IQR = Q3 − Q1.
+- Flag any point where `value < Q1 − 1.5 × IQR` or `value > Q3 + 1.5 × IQR`.
+
+### Deviation magnitude
+
+For every flagged point, report `(value − median) / median` as a signed
+percentage. This is what the CSA actually cares about, not the Z-score itself.
+
+### Classify each flagged timestamp
+
+- **Isolated spike/drop** — one point flagged, neighbors normal. Most likely a real anomaly (outage, release, data gap).
+- **Cluster** — 2+ consecutive points flagged in the same direction. Could be a short incident *or* the leading edge of drift. Flag as ambiguous and note that `metric-drift` may be a better follow-up.
+- **Edge-of-window cluster** — flagged points are the most recent N points. Strongly suggestive of drift, not anomaly. Recommend running `metric-drift` before treating as an anomaly incident.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether anything was flagged. A stable chart
+is the visual proof of stability and saves the CSA from second-guessing.
+
+**Top chart: 7-day hourly view** (Q1-hourly series)
+- Line for the hourly series.
+- Dots for every flagged hourly point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 7 days, hourly`.
+
+**Bottom chart: 30-day daily view** (Q1-daily series)
+- Line for the daily series.
+- Dots for every flagged daily point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 30 days, daily`.
+
+Both charts share x-axis type (date/time) but not range — render as two
+separate plots in one widget, stacked, with consistent y-axis formatting.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ ANOMALY VERDICT ━━
+Hourly series (7d):  <Clean | N flagged | Edge cluster — possible drift>
+Daily series (30d):  <Clean | N flagged | Edge cluster — possible drift>
+
+━━ TOP FLAGS ━━
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+... (cap 5; omit section entirely if no flags)
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is point-in-time anomaly detection only. Trend-level drift is not
+tested here — run `metric-drift` for that.
+```
+
+#### Headline phrasing discipline
+
+- No flags: "Metric is stable at the point-in-time level — no anomalies in the last 7 or 30 days."
+- Isolated flag(s): "Metric had a [spike/drop] of X% on [date]. Baseline otherwise stable."
+- Cluster or edge cluster: "Metric has [N] anomalies concentrated in the last [window] — likely the leading edge of drift. Recommend running `metric-drift` next."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+If >10 flags total across both series, cap the TOP FLAGS list at 5 entries
+sorted by deviation magnitude descending and add a note to the headline:
+"18 anomalies flagged in the last 7 days — the metric is either undergoing a
+regime shift or the baseline model is wrong. Run `metric-drift` before
+treating any single point as actionable."
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-anomaly",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-hourly", window: "last 7 days", granularity: "hour",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-daily",  window: "last 30 days", granularity: "day",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    hourly: [ { timestamp, value, deviation_pct, classification, test } , ... ],
+    daily:  [ { timestamp, value, deviation_pct, classification, test } , ... ]
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** The hourly view is usually too noisy for a multi-step
+funnel at low volume. Drop Q1-hourly and run Q1-daily only (last 14 days
+instead of 30 to stay lightweight). Note in output: "Hourly anomaly detection
+skipped — funnel volume too low at hourly granularity."
+
+**Retention metrics:** Retention is a rolling cohort metric — point-in-time
+anomaly detection mostly doesn't apply. Tell the user directly and recommend
+`metric-drift` instead, which has a cohort-over-cohort fallback for retention.
+
+**Very low-volume metrics (<100 events/day):** Skip Q1-hourly and run
+Q1-daily only — the Poisson noise floor dominates at hourly granularity.
+State this in the output.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not test for trend-level drift.** That's `metric-drift`.
+- **Does not attribute cause.** Root-cause investigation is out of scope for this command — run `metric-rca` after detection.
+- **Does not produce recommendations beyond "run drift" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean anomaly verdict in under 30 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-drift.md b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-drift.md
new file mode 100644
index 0000000..12e9456
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-drift.md
@@ -0,0 +1,319 @@
+# Command: metric-drift
+
+Detect trend-level drift in a single metric — whether the baseline itself has
+shifted over recent weeks. Produces a verdict on *whether* the metric is in a
+new regime. Does **not** test for point-in-time anomalies (run `metric-anomaly`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-anomaly.md` — edits to one must be mirrored in the other._
+
+### Prerequisite — name the drift and baseline windows
+
+The naming convention used throughout this command's output:
+
+- **`drift_window`** — the **recent** 30 days (most recent 30 days ending today).
+- **`baseline_window`** — the **prior** 30 days (30 days ending 30 days before today).
+
+Both windows are computed from Q1-daily. The weekly test uses 8 vs 8 weeks —
+those windows are reported alongside but are secondary to the daily windows
+for headline purposes.
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Comparison |
+|---|---|---|---|
+| Q1-daily | Last 60 days | `day` | Last 30 days vs. prior 30 days |
+| Q1-weekly | Last 16 weeks | `week` | Last 8 weeks vs. prior 8 weeks |
+
+The 60-day daily view catches medium-term drift. The 16-week weekly view
+catches slow drift that the daily window would miss because daily noise
+drowns the signal. Running both is cheap and they answer different questions.
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+---
+
+## Phase 2 — Drift tests (mean shift + variance ratio)
+
+### Window split & contamination check
+
+For each series, split into `recent` and `prior` halves (no overlap).
+
+**Lightweight anomaly contamination check** (important because this command
+can run standalone without `metric-anomaly` having run first):
+
+Scan the `recent` window for obvious outliers using a simple rule — any point
+more than 3σ from the window mean. If ≥20% of points in the `recent` window
+qualify → flag **"drift test potentially contaminated by outliers in the
+recent window"** and mark all drift findings as low-confidence. Recommend the
+user run `metric-anomaly` first.
+
+If 0–20% of points qualify, proceed normally but note the count in the
+verdict card's contamination section.
+
+This is deliberately lighter than `metric-anomaly`'s full time-bucketed
+test — its job here is only to flag contamination risk, not to produce a
+publishable anomaly verdict.
+
+### Test 1 — Mean shift (level drift)
+
+```
+mean_recent  = mean(recent_window)
+mean_prior   = mean(prior_window)
+level_delta  = (mean_recent − mean_prior) / mean_prior    # signed %
+```
+
+Flag thresholds:
+- `|level_delta| < 5%` → no meaningful shift
+- `5% ≤ |level_delta| < 15%` → moderate drift
+- `|level_delta| ≥ 15%` → significant drift
+
+Additionally compute a Welch's t-test on the two windows. If p < 0.05 and
+`level_delta ≥ 5%`, drift is statistically supported. If p ≥ 0.05, note the
+shift is observational but not statistically distinguishable from noise.
+
+### Test 2 — Variance ratio (volatility drift)
+
+```
+var_ratio = variance(recent_window) / variance(prior_window)
+```
+
+Flag thresholds:
+- `0.67 ≤ var_ratio ≤ 1.5` → variance stable
+- `var_ratio > 1.5` → metric got noisier (investigate instrumentation, cohort mix)
+- `var_ratio < 0.67` → metric got smoother (often a sign of flatlining or saturation)
+
+Variance drift without level drift is an under-appreciated signal — the
+headline number looks fine but something structural changed. Always surface
+it separately.
+
+Distribution-shape tests (KS, PSI) are intentionally **not** part of this
+battery. They require per-user or per-segment values, which Mixpanel's MCP
+surface does not return at practical cost.
+
+### Combine into a per-series verdict
+
+| Verdict | When |
+|---|---|
+| **No drift** | Level stable AND variance stable |
+| **Level drift** | Level shifted ≥5%, variance stable |
+| **Variance drift** | Level stable, variance ratio outside 0.67–1.5 |
+| **Compound drift** | Both |
+
+Also report **direction** (up / down) and **magnitude** (% for level, ratio
+for variance).
+
+### Reconcile the two series
+
+The 60-day-daily and 16-week-weekly views should agree on direction. If they
+disagree:
+
+- **Weekly says drift, daily says none** → slow drift that daily noise hides. Trust the weekly.
+- **Daily says drift, weekly says none** → recent movement that hasn't accumulated into the weekly window yet. Could be the leading edge of real drift, or a contained incident. Trust the daily but note the weekly hasn't confirmed.
+- **Both agree** → high confidence, state it.
+
+### Classify drift shape
+
+If drift is flagged, classify its shape using the daily series for use in
+the verdict card:
+
+| Condition | `verdict_shape` value |
+|---|---|
+| Single-day change point where mean shift before vs after explains ≥60% of variance, and before/after segments are each <20% within-segment variance | `step` (record the change-point date) |
+| Linear regression fit to the full 60-day series has R² ≥ 0.5 and non-zero slope | `slope` |
+| 7-day autocorrelation on residuals ≥ 0.5, and periodicity strength differs between drift and baseline windows | `oscillating` |
+| None of the above fit cleanly | `unclassified` |
+
+**Shape precedence**: if multiple shapes fit, use this priority:
+`step` > `slope` > `oscillating` > `unclassified`. (Step changes are the
+most actionable; surface them first when ambiguous.)
+
+If no drift was flagged, skip shape classification entirely.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether drift was detected. A stable chart
+is the visual proof of stability.
+
+**Top chart: 60-day daily view** (Q1-daily series)
+- Line for the daily series.
+- **Shaded band** for the prior 30-day baseline window (subtle grey fill).
+- **Shaded band** for the recent 30-day drift window — red-tinted fill if drift is `down`, green-tinted if `up`, amber-tinted if `mixed`, grey if no drift.
+- Horizontal line for `mean_prior` (dashed grey).
+- Horizontal line for `mean_recent` (dashed, colored to match drift direction).
+- If `verdict_shape = step`, annotate the change-point date with a vertical dashed line.
+- Title: `<metric_name> — last 60 days, daily`.
+
+**Bottom chart: 16-week weekly view** (Q1-weekly series)
+- Line for the weekly series.
+- **Shaded band** for the prior 8-week baseline window (subtle grey fill).
+- **Shaded band** for the recent 8-week drift window — same direction-based coloring as above.
+- Horizontal lines for `mean_prior_weekly` (dashed grey) and `mean_recent_weekly` (dashed, colored).
+- Title: `<metric_name> — last 16 weeks, weekly`.
+
+Both charts share x-axis type (date) and consistent y-axis formatting.
+Render as two separate plots in one widget, stacked.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ DRIFT VERDICT ━━
+60-day / daily view:   <verdict>  <direction>  <magnitude>  (t-test p = <p>)
+16-week / weekly view: <verdict>  <direction>  <magnitude>
+Reconciled verdict:    <one sentence>
+Shape:                 <step | slope | oscillating | unclassified>  <change-point date if step>
+
+━━ CONTAMINATION ━━
+<none | recent window contains N outliers — drift confidence downgraded; recommend metric-anomaly first>
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is trend-level drift detection only. Point-in-time anomalies are not
+tested here — run `metric-anomaly` for that.
+```
+
+#### Headline phrasing discipline
+
+- No drift: "Metric is stable — trend has not shifted in the last 30 days or 8 weeks."
+- Level drift: "Metric has drifted [up/down] by X% over the last 30 days. [Weekly view confirms / Weekly view hasn't confirmed yet]."
+- Variance drift only: "Metric level is stable but volatility has [increased/decreased] — variance ratio [X.XX]. Something structural changed without moving the headline."
+- Compound drift: "Metric has drifted [up/down] by X% AND volatility changed. Compound drift — investigate both level and structure."
+- Contamination flag: append "Drift confidence is low — recent window has N outlier points. Run `metric-anomaly` first to clean up before attributing."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-drift",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-daily",  window: "last 60 days",  granularity: "day",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-weekly", window: "last 16 weeks", granularity: "week",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    daily:  { verdict, direction, level_delta, var_ratio, t_test_p, shape, change_point_date },
+    weekly: { verdict, direction, level_delta, var_ratio },
+    reconciled: <one-line reconciled verdict>,
+    contamination: { outlier_count, contaminated: bool }
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** Phase 1 and Phase 2 work as-is for multi-step funnels
+— the overall conversion series is what drifts. No special handling needed.
+
+**Retention metrics:** Retention is a rolling cohort metric — "drift" on a
+retention curve means cohort-over-cohort degradation. Replace the 60-day
+daily and 16-week weekly splits with a cohort-over-cohort comparison: last
+8 cohorts vs. prior 8 cohorts on the same retention day (D1, D7, D30). Flag
+which retention day shifted. Note in the verdict card: "Retention
+cohort-over-cohort comparison used in place of daily/weekly split."
+
+**Very low-volume metrics (<100 events/day):** The tests still apply but
+statistical confidence drops sharply. Downgrade confidence to `low` regardless
+of `level_delta` magnitude and note: "Low-volume metric — drift signal may be
+Poisson noise."
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not detect point-in-time anomalies.** That's `metric-anomaly`.
+- **Does not attribute cause.** Root-cause investigation is handled by `metric-rca` after detection.
+- **Does not produce recommendations beyond "run anomaly first" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean drift verdict in under 60 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-rca.md b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-rca.md
new file mode 100644
index 0000000..ac2bf98
--- /dev/null
+++ b/plugins/mixpanel-mcp-eu/skills/monitor-metrics/commands/metric-rca.md
@@ -0,0 +1,484 @@
+# Command: metric-rca
+
+Root-cause investigation for a flagged metric. Takes the diagnosis payload
+from a prior `metric-anomaly` or `metric-drift` run and fans out across a
+set of segmentation branches to localise *where* the movement concentrated.
+Produces a ranked list of findings and appends them to the diagnosis board
+the user already created.
+
+This command does **not** re-run anomaly or drift detection. It assumes the
+movement has already been established — its job is attribution, not
+detection.
+
+---
+
+## Prerequisites
+
+Before this command runs, the session must hold a **diagnosis payload** in
+conversation memory from an earlier `metric-anomaly` or `metric-drift` run
+(see `SKILL.md` Step 2). The payload carries the project, metric, metric
+type, date ranges, flagged points or drift windows, and the query bodies
+used.
+
+If no payload exists, do **not** attempt to run RCA from a cold start. Tell
+the user: *"RCA runs on top of an existing anomaly or drift diagnosis. Run
+`metric-anomaly` or `metric-drift` first, then come back here."* Stop.
+
+### Board state
+
+If the user persisted the diagnosis as a Mixpanel board (Step 2 in
+`SKILL.md`), the payload will include `diagnosis_board_id`. This command
+**appends** to that board — it does not create a new one. If no board was
+created, skip the append step at the end and just return the findings
+inline; do not silently create a new board.
+
+### Ask once — business / market context
+
+Before firing Branch 5, ask the user exactly once:
+
+> *"What business or market is this metric tied to? (e.g., Indian
+> e-commerce, Indian OTT streaming, SEA fintech.) I'll use this to check
+> whether the flagged dates line up with festivals, launches, or
+> category-specific events."*
+
+Hold the answer as `business_context`. If the user skips or says "not
+relevant", skip Branch 5 entirely — do not guess the market from project
+name or memory.
+
+---
+
+## Phase 1 — Branch selection + parallel fan-out
+
+Read the payload and decide which branches to run. Every branch runs
+against the **same date ranges** the source command used:
+
+- `metric-anomaly` payload → use 7-day hourly + 30-day daily windows.
+- `metric-drift` payload → use 60-day daily + 16-week weekly windows, with
+  recent vs prior window comparison preserved.
+
+If both payloads exist in the session (user ran anomaly then drift),
+prefer the drift payload's date ranges — RCA over a longer window is more
+useful — and annotate findings with the anomaly payload's flagged
+timestamps for cross-reference.
+
+### Branch selection matrix
+
+| Branch | Purpose | Runs when |
+|---|---|---|
+| **Branch 1 — Component decomposition** | Break ratio/funnel/retention into its component events + metric-definition filters | `metric_type ∈ {ratio, funnel, retention}` |
+| **Branch 2 — Default-property breakdowns** | Source → geography → client-specific split | Always |
+| **Branch 3 — Distinct-ID outliers** | Find whether a small set of users drove the movement | Anomaly payload only. Skip if in-window distinct user count >10k |
+| **Branch 4 — Cohort comparison** | Run the metric filtered to the cohorts the user names to find concentration in named user segments | The user named one or more cohorts (or referenced a cohort in their ask) |
+| **Branch 5 — Calendar context** | Check whether flagged dates line up with festivals, launches, category events in `business_context` | `business_context` provided |
+
+Run all selected branches **in parallel** via concurrent `Run-Query` calls.
+Each branch can issue multiple queries; batch within a branch sequentially
+if one query's result informs the next (Branch 2's second level depends
+on the first).
+
+---
+
+## Branch 1 — Component decomposition
+
+Only runs for `ratio`, `funnel`, and `retention` metrics. The question:
+*is the movement in the numerator, the denominator, or a specific step?*
+
+**If the metric came from a saved Mixpanel Metric** (`metric_id` is set on
+the payload), read the component events, formula, and filters straight from
+the `Get-Metric` definition rather than re-deriving them — the definition is
+authoritative and avoids guessing the numerator/denominator. Fall back to
+the derivation below only when no saved-Metric definition is available.
+
+### For `ratio`
+1. Pull numerator event as a standalone count series (same window,
+   granularity, and filters from the metric definition).
+2. Pull denominator event as a standalone count series (same window,
+   granularity, and filters).
+3. Compare each component's deviation % against the ratio's overall
+   deviation %. Flag which component moved.
+4. If both components moved in the same direction by similar magnitude →
+   the ratio is stable but volumes shifted. Note as a volume story, not a
+   conversion story.
+5. If only one moved, or they moved opposite directions → the ratio
+   shift is concentration-driven. Identify which.
+
+### For `funnel`
+1. Run the **same funnel definition** twice as `report_type=funnels` via
+   `Run-Query`: once for the recent (drift/anomaly) window, once for the
+   baseline window. The native funnels response returns step conversion
+   rates and absolute counts per step.
+2. For each step pair, compute the conversion-rate delta between recent
+   and baseline.
+3. Flag the **specific step pair** with the largest absolute conversion
+   drop. One step usually owns the drop; surface that pair as the
+   headline finding.
+4. If the funnel has step-level filters (e.g. property filters on
+   individual steps), do not decompose into standalone event counts —
+   the filters change the meaning. The native funnels query is the only
+   faithful comparison.
+
+This replaces the prior "pull each funnel step as a standalone event
+count" approach. Standalone event counts ignore step ordering and
+step-level filters; the native funnels report does not.
+
+### For `retention`
+1. Pull the cohort-defining event as a standalone count series.
+2. Pull the return event as a standalone count series.
+3. Check whether cohort size changed, return count changed, or both.
+4. A drop in retention with stable return count + larger cohort is a mix
+   effect; a drop in return count with stable cohort is real attrition.
+
+### Event × metric-definition filter combinations
+
+For every component event above, re-run it with **each filter from the
+metric definition applied independently** (i.e. one filter at a time, not
+all combinations — combinatorial blowup is not useful here). This shows
+whether a specific filter value concentrates the movement.
+
+Example: if the metric definition has `user_type = premium` baked in,
+and the numerator event is `video_play`, run:
+- `video_play` with no filter
+- `video_play` with `user_type = premium` (the baked filter) — this
+  should match the metric's numerator
+- `video_play` broken down **by** `user_type` (all values) — exposes
+  whether the movement is specific to `premium` or shared across the
+  population.
+
+Cap at 5 filter values per property breakdown; drop the long tail.
+
+---
+
+## Branch 2 — Default-property breakdowns
+
+Two-level cascade. Always runs.
+
+### Level 1 — Source segmentation
+
+Break down the metric by the SDK / ingestion source. Two properties
+together:
+
+- Event property `mp_lib` (string) — SDK name (e.g. `web`, `android`,
+  `iphone`, `swift`, `python`, `ruby`, `java`).
+- Event property `$import` (boolean) — true for events ingested via the
+  Import API, false for Track API.
+
+Output: a matrix of `mp_lib × $import` with deviation % per cell. The
+goal here is to isolate whether the movement is concentrated in
+client-side vs server-side vs Import API ingestion.
+
+### Level 2 — Conditional breakdowns
+
+The Level 2 slice depends on what Level 1 surfaced. Run the slice whose
+dominant source owns the movement; skip the others.
+
+**For client-side sources (`web`, `android`, `iphone`, `swift`, etc.):**
+Common first slice — geography in a step function:
+- Event property `$os`
+- Event property `platform` (or the project's equivalent; check the
+  metric definition or fall back to `mp_lib` if not present)
+- Event property `mp_country_code`
+- Event property `$region`
+- Event property `$city`
+
+Run these as a **step function**, not a cross-product: start with
+`mp_country_code`. If one country owns >50% of the movement, break that
+country down by `$region`. If one region owns >50%, break by `$city`.
+Stop when the concentration flattens.
+
+**For `web` specifically:**
+- Event property `$device`
+- Event property `utm_source`
+- Event property `$browser`
+
+**For `android` / `iphone` / `swift` / `ios`:**
+- Event property `$app_version_string`
+- Event property `$model`
+
+Run these as single-property breakdowns, not two-level (avoids the
+high-cardinality two-level truncation risk that bites large projects).
+
+### Cardinality discipline
+
+- Any breakdown returning exactly 1,000 / 3,000 / 10,000 rows is
+  potentially truncated — flag in findings, do not treat the result as
+  exhaustive.
+- If a two-level breakdown (`mp_lib × $import`) is used, keep the
+  first-level cardinality bounded: if `mp_lib` returns >20 distinct
+  values, filter to the top 10 by volume before running the second
+  level.
+
+---
+
+## Branch 3 — Distinct-ID outliers
+
+Only runs for anomaly payloads. Goal: is a small set of users
+responsible for the flagged point(s)?
+
+### Cardinality gate
+
+Before running, check in-window distinct user count against the metric's
+base query. If >10,000 distinct users contributed to the metric in the
+flagged window, skip this branch and note "Branch 3 skipped — user
+cardinality too high for outlier detection via MCP." A top-N breakdown
+on 100k users returns noise.
+
+### If within cardinality
+
+1. Break the metric down by `distinct_id` for the flagged window only
+   (not the whole series — this keeps the query tractable).
+2. Rank users by their contribution to the metric in the flagged window.
+3. Flag outliers: users whose contribution in the flagged window is
+   >5σ above the median user's contribution, OR users who appear in
+   the flagged window but not in the baseline window.
+4. Cap output at the top 20 distinct_ids by deviation.
+
+If the top 5 users account for >30% of the movement → strong user-driven
+outlier signal. Surface this prominently. Could be bots, internal test
+traffic, or a single high-volume customer.
+
+### Optional follow-up — session replay context
+
+If the top 3 distinct_ids each account for ≥10% of the movement individually,
+offer the user a follow-up: *"Top user(s) `<distinct_id>` drove [X]% of the
+flagged window. Want me to pull their session replays from that window so
+you can see what they did?"*
+
+If the user says yes, call `Get-User-Replays-Data` for each flagged
+distinct_id with `from_date` and `to_date` set to the flagged window. Cap at
+3 distinct_ids and 5 replays per user. Surface the replay URLs + timestamps
+in the findings card under the Branch 3 section.
+
+This is **opt-in only** — do not pull replays automatically. Replays add
+value when the customer wants the "what did they actually do" answer, but
+they're noisy if Session Replay isn't widely enabled in the project. Ask
+once, run if confirmed, skip if declined.
+
+---
+
+## Branch 4 — Cohort comparison
+
+Goal: is the movement concentrated in a specific user cohort the customer
+already cares about? Cohorts are typically the most CSA-actionable RCA
+signal — "your churn-risk cohort dropped 40%" is a far better headline than
+"users on iOS 17.4 dropped 40%."
+
+### Step 1 — Identify candidate cohorts
+
+The Mixpanel MCP surface has **no cohort-listing tool** — `Search-Entities`
+does not support a `cohort` entity type (its types are insights, funnels,
+flows, retention, dashboard, launch-analysis, experiments, feature-flags,
+metric-trees, playlists, heat-maps). Branch 4 therefore cannot auto-discover
+cohorts; source them from the user instead:
+
+1. If the user named cohorts in their original ask (e.g. "is this happening
+   in our power users?"), use those.
+2. Otherwise, ask once: *"Want me to compare against any saved cohorts? If
+   so, name them (or share their cohort IDs) and I'll filter the metric to
+   each."*
+
+If the user names no cohorts (or declines) → record *"Branch 4 skipped — no
+cohorts named; cohort auto-discovery isn't available on the MCP surface."*
+and continue.
+
+### Step 2 — Resolve the named cohorts
+
+Cap at the **top 5 cohorts** the user named. For each, resolve its
+`cohort_id` — the user may give a name or an id; if only a name is given,
+confirm it back before filtering. If the user named more than 5, ask which
+five matter most.
+
+Surface the cohort names in the findings — the customer recognizes their
+own cohort names and that's part of the value.
+
+### Step 3 — Run the metric filtered by each cohort
+
+For each selected cohort, run the same `query_template` as the headline
+metric, with one cohort-membership filter added. The exact filter shape
+comes from `Get-Query-Schema` — Mixpanel's query schema accepts cohort
+membership as a filter on `distinct_id` referencing the cohort_id.
+
+Run all cohort queries in parallel via concurrent `Run-Query` calls. Each
+query covers the same date window the source command used (drift window
+or anomaly window).
+
+### Step 4 — Score and rank
+
+For each cohort, compute the same concentration + deviation scores used
+in the Phase 2 ranking step (cohort_delta_abs / total_delta_abs and the
+cohort's own deviation %). Treat cohorts as candidate findings the same
+way property breakdowns are treated.
+
+A cohort is **important** if either:
+- It explains ≥30% of the headline movement (lower threshold than the
+  default 40% — cohorts are smaller slices than top-level properties,
+  and 30% concentration in a named cohort is a strong signal), OR
+- Its individual deviation is ≥1.5× the headline metric's deviation.
+
+### Error handling
+
+| Situation | Response |
+|---|---|
+| User names no cohorts | Skip branch, record reason. |
+| A cohort filter fails in `Run-Query` (cohort schema mismatch) | Retry once. If still failing, skip that cohort, continue others, note in branch coverage. |
+| All cohort queries fail | Skip branch, note "Branch 4 skipped — cohort filtering failed across all cohorts." |
+
+---
+
+## Branch 5 — Calendar context
+
+Only runs if the user provided `business_context`.
+
+1. Identify the key dates in the flagged window. For anomaly payloads,
+   use the timestamps from `payload.flags.hourly` and `payload.flags.daily`.
+   For drift payloads, use the change-point date if `shape = step`, or
+   the start of the drift window otherwise.
+2. Run a `web_search` with a query built from `business_context` + the
+   relevant date(s). Example: if `business_context = "Indian e-commerce"`
+   and the change-point is `2026-03-08`, search `"Indian e-commerce
+   events March 8 2026 festival sale"`. If `web_search` isn't available in
+   this runtime, skip Branch 5 and record *"Branch 5 skipped — web search
+   unavailable in this runtime"* (mirrors the no-`business_context` skip);
+   the other four branches still run.
+3. Look for matches: religious festivals, cricket fixtures, sale events
+   (BBD, EOSS, GOSF), product launches, regulatory dates (e.g. RBI policy
+   announcements).
+4. If a plausible match surfaces, include it in findings with a
+   confidence label: `strong` (exact date match, major event), `moderate`
+   (same week, category-aligned), `weak` (same month, tangential).
+5. If nothing surfaces, record: *"No calendar events found for
+   `<business_context>` on the flagged dates."*
+
+This branch is **context**, not **evidence**. Phrase findings as "the
+flagged date falls on [event]" — never as "the [event] caused the
+movement." Correlation only; causation belongs to the customer.
+
+---
+
+## Phase 2 — Synthesise, rank, visualise
+
+### Rank findings
+
+For every branch, each sub-segment (a `mp_lib` value, a country, a funnel
+step, a distinct_id, etc.) is a candidate finding. Score each:
+
+- **Concentration score** — share of the total movement this segment
+  explains. `segment_delta_abs / total_delta_abs`. A segment with 70%
+  concentration is worth surfacing; 5% is not.
+- **Deviation score** — this segment's deviation % compared to its own
+  baseline. A segment that individually deviated 40% is stronger signal
+  than one that deviated 5%.
+
+Flag a finding as **"important"** if **either** of these is true:
+- Concentration score ≥ 0.4 (one segment owns ≥40% of the movement), OR
+- Segment deviation ≥ 1.5× the headline metric's deviation (the movement
+  concentrates here).
+
+Cap total important findings at 6. If more than 6 qualify, keep the top 6
+by concentration × deviation combined rank.
+
+### Visualise important findings
+
+Render a single visualizer widget containing one chart per important
+finding, stacked vertically. Chart type by branch:
+
+| Branch | Chart |
+|---|---|
+| Branch 1 (component) | Two-line overlay: headline metric vs component metric, same window, same granularity |
+| Branch 2 (property breakdown) | Horizontal bar chart, one bar per segment, bar length = deviation %, color-coded by direction |
+| Branch 3 (distinct_id) | Horizontal bar chart, top-N users by contribution % in flagged window |
+| Branch 4 (cohort) | Horizontal bar chart, one bar per important cohort, bar length = deviation %, color-coded by direction |
+| Branch 5 (calendar) | No chart — rendered as an annotation in the written findings block |
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]`
+once if not already loaded this session. Do not narrate the read_me call.
+
+### The findings card
+
+```
+METRIC: <metric_name> — <project_id>
+DIAGNOSIS SOURCE: <metric-anomaly | metric-drift | both>
+WINDOW: <window described in the same language as the source verdict card>
+
+━━ HEADLINE ━━
+<one sentence naming the strongest finding, or "No single segment concentrates the movement — treat as distributed.">
+
+━━ IMPORTANT FINDINGS (ranked) ━━
+1. [Branch N] <segment description> — <concentration %> of movement,
+   <deviation %> vs baseline. <one-line interpretation>.
+2. ...
+(cap 6; omit section if no important findings)
+
+━━ BRANCH COVERAGE ━━
+Branch 1 (component):        <ran | skipped — reason>
+Branch 2 (default props):    <ran | skipped — reason>
+Branch 3 (distinct_id):      <ran | skipped — reason>
+Branch 4 (cohort):           <ran + N cohorts compared | skipped — no cohorts named>
+Branch 5 (calendar):         <ran + N events found | skipped — no business context>
+
+━━ WHAT THIS ISN'T ━━
+This is attribution by segmentation, not causal analysis. Findings show
+where the movement concentrated; they do not prove what caused it.
+Calendar matches are correlation only.
+```
+
+### The RCA payload (passed back to SKILL.md)
+
+After rendering the findings card + charts, hand back to the skill-level
+flow:
+
+```
+{
+  command: "metric-rca",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  source_payload_command: "metric-anomaly" | "metric-drift",
+  business_context: <string or null>,
+  rca_queries: [
+    { branch: int, label: str, run_query_body: dict, result: dict }, ...
+  ],
+  important_findings: [
+    { branch: int, segment: str, concentration_pct: float,
+      deviation_pct: float, interpretation: str,
+      chart_spec: dict },
+    ... (cap 6)
+  ],
+  findings_card: <full rendered card above>,
+  headline: <the HEADLINE line>,
+  diagnosis_board_id: <from source payload, or null>
+}
+```
+
+The skill-level flow (Step 3 in `SKILL.md`, added with this command)
+handles the board append.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| No diagnosis payload in session | Stop. Tell user to run `metric-anomaly` or `metric-drift` first. |
+| A branch query fails | Retry once. If still failing, mark that branch partial, continue others, note in branch coverage. |
+| All branches fail | Stop. Report failure and ask the user to verify project access. |
+| Branch 2 Level 1 returns only one `mp_lib × $import` cell with meaningful volume | Skip Branch 2 Level 2 conditional logic; run the fallback geography step function directly. |
+| User declines to provide `business_context` | Skip Branch 5 entirely, proceed with others. |
+| `web_search` unavailable in this runtime | Skip Branch 5, record "Branch 5 skipped — web search unavailable." Other branches continue. |
+| No important findings after ranking (all segments <40% concentration and <1.5× deviation) | Surface that finding: "Movement is distributed across segments — no single dimension concentrates it." This is a valid, useful result. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not re-run anomaly or drift detection.** It consumes the payload.
+- **Does not claim causation.** Correlation by segmentation is the ceiling.
+- **Does not cross-join properties combinatorially.** Branch 2 is a
+  step-function cascade, not a cross-product, because high-cardinality
+  two-level breakdowns truncate silently.
+- **Does not source calendar dates from memory.** Always `web_search`
+  with the user-provided `business_context` (skips gracefully if web search
+  is unavailable).
+- **Does not create a new board.** Appends to the existing diagnosis
+  board via the skill-level flow.
+
+Keep the surface narrow. A ranked list of 3-6 concentrated segments with
+charts beats a 40-branch exhaustive report every time.
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000700000001 b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000700000001
new file mode 100644
index 0000000..11a1684
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000700000001
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `Mixpanel MCP:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `Mixpanel MCP:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `Mixpanel MCP:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `Mixpanel MCP:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `Mixpanel MCP:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000800000002 b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000800000002
new file mode 100644
index 0000000..954c4fe
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000800000002
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-in:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-in:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-in:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-in:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-in:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000900000003 b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000900000003
new file mode 100644
index 0000000..5b317fe
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000900000003
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-in` connector (Mixpanel India). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-in:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-in:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-in:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-in:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-in:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000a00000004 b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000a00000004
new file mode 100644
index 0000000..831ac65
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/.fuse_hidden0000000a00000004
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp-in` connector (Mixpanel India).
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-in` connector (Mixpanel India). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-in:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-in:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-in:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-in:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-in:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/SKILL.md b/plugins/mixpanel-mcp-in/skills/monitor-metrics/SKILL.md
new file mode 100644
index 0000000..a2f3849
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/SKILL.md
@@ -0,0 +1,462 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp-in` connector (Mixpanel India).
+---
+
+# Monitor Metrics
+
+> **Connector:** This skill operates exclusively against the `mixpanel-mcp-in` connector (Mixpanel India region). Every Mixpanel MCP tool call in this SKILL.md and in every file under `commands/` must be routed through `mixpanel-mcp-in` — never any other Mixpanel connector.
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp-in` connector (Mixpanel India). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp-in:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp-in:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp-in:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp-in:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp-in:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `List-Properties` with
+   `names=[<filter_property>]` and `resource_type=<Event|User>` (pass
+   `events=[<event>]` to scope to a specific event's properties). If it
+   doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-anomaly.md b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-anomaly.md
new file mode 100644
index 0000000..25530aa
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-anomaly.md
@@ -0,0 +1,236 @@
+# Command: metric-anomaly
+
+Detect point-in-time anomalies in a single metric — recent spikes, drops, and
+clusters. Produces a verdict on *whether* something unusual happened at a
+specific moment. Does **not** test for trend-level drift (run `metric-drift`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-drift.md` — edits to one must be mirrored in the other._
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Purpose |
+|---|---|---|---|
+| Q1-hourly | Last 7 days | `hour` | Recent-blip detection |
+| Q1-daily | Last 30 days | `day` | Recent-day detection against a fuller baseline |
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+Build the `Run-Query` body from `query_template` with only `dateRange` and
+`unit` (granularity) overridden. Use `timeComparison` when a single call can
+cover both windows.
+
+---
+
+## Phase 2 — Outlier tests (Z-score + IQR, time-bucketed)
+
+For each series independently, compute the expected range at every timestamp.
+Run **both** tests; flag a point if **either** test flags it. Report which
+test(s) caught each flag.
+
+### Test 1 — Z-score against time-bucketed mean
+
+- For the **hourly** series: group all points by hour-of-day (0–23) and day-of-week (7 × 24 = 168 buckets). Compute mean (μ) and stddev (σ) per bucket across the 7-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- For the **daily** series: group by day-of-week (7 buckets). Compute μ and σ across the 30-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- Handle low-variance buckets: if σ is <5% of μ, skip the Z-score for that bucket and fall back to IQR only (division by tiny σ creates false alarms).
+
+### Test 2 — IQR against time-bucketed median
+
+- Same bucketing scheme as Test 1.
+- For each bucket, compute Q1, median, Q3, and IQR = Q3 − Q1.
+- Flag any point where `value < Q1 − 1.5 × IQR` or `value > Q3 + 1.5 × IQR`.
+
+### Deviation magnitude
+
+For every flagged point, report `(value − median) / median` as a signed
+percentage. This is what the CSA actually cares about, not the Z-score itself.
+
+### Classify each flagged timestamp
+
+- **Isolated spike/drop** — one point flagged, neighbors normal. Most likely a real anomaly (outage, release, data gap).
+- **Cluster** — 2+ consecutive points flagged in the same direction. Could be a short incident *or* the leading edge of drift. Flag as ambiguous and note that `metric-drift` may be a better follow-up.
+- **Edge-of-window cluster** — flagged points are the most recent N points. Strongly suggestive of drift, not anomaly. Recommend running `metric-drift` before treating as an anomaly incident.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether anything was flagged. A stable chart
+is the visual proof of stability and saves the CSA from second-guessing.
+
+**Top chart: 7-day hourly view** (Q1-hourly series)
+- Line for the hourly series.
+- Dots for every flagged hourly point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 7 days, hourly`.
+
+**Bottom chart: 30-day daily view** (Q1-daily series)
+- Line for the daily series.
+- Dots for every flagged daily point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 30 days, daily`.
+
+Both charts share x-axis type (date/time) but not range — render as two
+separate plots in one widget, stacked, with consistent y-axis formatting.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ ANOMALY VERDICT ━━
+Hourly series (7d):  <Clean | N flagged | Edge cluster — possible drift>
+Daily series (30d):  <Clean | N flagged | Edge cluster — possible drift>
+
+━━ TOP FLAGS ━━
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+... (cap 5; omit section entirely if no flags)
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is point-in-time anomaly detection only. Trend-level drift is not
+tested here — run `metric-drift` for that.
+```
+
+#### Headline phrasing discipline
+
+- No flags: "Metric is stable at the point-in-time level — no anomalies in the last 7 or 30 days."
+- Isolated flag(s): "Metric had a [spike/drop] of X% on [date]. Baseline otherwise stable."
+- Cluster or edge cluster: "Metric has [N] anomalies concentrated in the last [window] — likely the leading edge of drift. Recommend running `metric-drift` next."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+If >10 flags total across both series, cap the TOP FLAGS list at 5 entries
+sorted by deviation magnitude descending and add a note to the headline:
+"18 anomalies flagged in the last 7 days — the metric is either undergoing a
+regime shift or the baseline model is wrong. Run `metric-drift` before
+treating any single point as actionable."
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-anomaly",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-hourly", window: "last 7 days", granularity: "hour",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-daily",  window: "last 30 days", granularity: "day",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    hourly: [ { timestamp, value, deviation_pct, classification, test } , ... ],
+    daily:  [ { timestamp, value, deviation_pct, classification, test } , ... ]
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** The hourly view is usually too noisy for a multi-step
+funnel at low volume. Drop Q1-hourly and run Q1-daily only (last 14 days
+instead of 30 to stay lightweight). Note in output: "Hourly anomaly detection
+skipped — funnel volume too low at hourly granularity."
+
+**Retention metrics:** Retention is a rolling cohort metric — point-in-time
+anomaly detection mostly doesn't apply. Tell the user directly and recommend
+`metric-drift` instead, which has a cohort-over-cohort fallback for retention.
+
+**Very low-volume metrics (<100 events/day):** Skip Q1-hourly and run
+Q1-daily only — the Poisson noise floor dominates at hourly granularity.
+State this in the output.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not test for trend-level drift.** That's `metric-drift`.
+- **Does not attribute cause.** Root-cause investigation is out of scope for this command — run `metric-rca` after detection.
+- **Does not produce recommendations beyond "run drift" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean anomaly verdict in under 30 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-drift.md b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-drift.md
new file mode 100644
index 0000000..12e9456
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-drift.md
@@ -0,0 +1,319 @@
+# Command: metric-drift
+
+Detect trend-level drift in a single metric — whether the baseline itself has
+shifted over recent weeks. Produces a verdict on *whether* the metric is in a
+new regime. Does **not** test for point-in-time anomalies (run `metric-anomaly`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-anomaly.md` — edits to one must be mirrored in the other._
+
+### Prerequisite — name the drift and baseline windows
+
+The naming convention used throughout this command's output:
+
+- **`drift_window`** — the **recent** 30 days (most recent 30 days ending today).
+- **`baseline_window`** — the **prior** 30 days (30 days ending 30 days before today).
+
+Both windows are computed from Q1-daily. The weekly test uses 8 vs 8 weeks —
+those windows are reported alongside but are secondary to the daily windows
+for headline purposes.
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Comparison |
+|---|---|---|---|
+| Q1-daily | Last 60 days | `day` | Last 30 days vs. prior 30 days |
+| Q1-weekly | Last 16 weeks | `week` | Last 8 weeks vs. prior 8 weeks |
+
+The 60-day daily view catches medium-term drift. The 16-week weekly view
+catches slow drift that the daily window would miss because daily noise
+drowns the signal. Running both is cheap and they answer different questions.
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+---
+
+## Phase 2 — Drift tests (mean shift + variance ratio)
+
+### Window split & contamination check
+
+For each series, split into `recent` and `prior` halves (no overlap).
+
+**Lightweight anomaly contamination check** (important because this command
+can run standalone without `metric-anomaly` having run first):
+
+Scan the `recent` window for obvious outliers using a simple rule — any point
+more than 3σ from the window mean. If ≥20% of points in the `recent` window
+qualify → flag **"drift test potentially contaminated by outliers in the
+recent window"** and mark all drift findings as low-confidence. Recommend the
+user run `metric-anomaly` first.
+
+If 0–20% of points qualify, proceed normally but note the count in the
+verdict card's contamination section.
+
+This is deliberately lighter than `metric-anomaly`'s full time-bucketed
+test — its job here is only to flag contamination risk, not to produce a
+publishable anomaly verdict.
+
+### Test 1 — Mean shift (level drift)
+
+```
+mean_recent  = mean(recent_window)
+mean_prior   = mean(prior_window)
+level_delta  = (mean_recent − mean_prior) / mean_prior    # signed %
+```
+
+Flag thresholds:
+- `|level_delta| < 5%` → no meaningful shift
+- `5% ≤ |level_delta| < 15%` → moderate drift
+- `|level_delta| ≥ 15%` → significant drift
+
+Additionally compute a Welch's t-test on the two windows. If p < 0.05 and
+`level_delta ≥ 5%`, drift is statistically supported. If p ≥ 0.05, note the
+shift is observational but not statistically distinguishable from noise.
+
+### Test 2 — Variance ratio (volatility drift)
+
+```
+var_ratio = variance(recent_window) / variance(prior_window)
+```
+
+Flag thresholds:
+- `0.67 ≤ var_ratio ≤ 1.5` → variance stable
+- `var_ratio > 1.5` → metric got noisier (investigate instrumentation, cohort mix)
+- `var_ratio < 0.67` → metric got smoother (often a sign of flatlining or saturation)
+
+Variance drift without level drift is an under-appreciated signal — the
+headline number looks fine but something structural changed. Always surface
+it separately.
+
+Distribution-shape tests (KS, PSI) are intentionally **not** part of this
+battery. They require per-user or per-segment values, which Mixpanel's MCP
+surface does not return at practical cost.
+
+### Combine into a per-series verdict
+
+| Verdict | When |
+|---|---|
+| **No drift** | Level stable AND variance stable |
+| **Level drift** | Level shifted ≥5%, variance stable |
+| **Variance drift** | Level stable, variance ratio outside 0.67–1.5 |
+| **Compound drift** | Both |
+
+Also report **direction** (up / down) and **magnitude** (% for level, ratio
+for variance).
+
+### Reconcile the two series
+
+The 60-day-daily and 16-week-weekly views should agree on direction. If they
+disagree:
+
+- **Weekly says drift, daily says none** → slow drift that daily noise hides. Trust the weekly.
+- **Daily says drift, weekly says none** → recent movement that hasn't accumulated into the weekly window yet. Could be the leading edge of real drift, or a contained incident. Trust the daily but note the weekly hasn't confirmed.
+- **Both agree** → high confidence, state it.
+
+### Classify drift shape
+
+If drift is flagged, classify its shape using the daily series for use in
+the verdict card:
+
+| Condition | `verdict_shape` value |
+|---|---|
+| Single-day change point where mean shift before vs after explains ≥60% of variance, and before/after segments are each <20% within-segment variance | `step` (record the change-point date) |
+| Linear regression fit to the full 60-day series has R² ≥ 0.5 and non-zero slope | `slope` |
+| 7-day autocorrelation on residuals ≥ 0.5, and periodicity strength differs between drift and baseline windows | `oscillating` |
+| None of the above fit cleanly | `unclassified` |
+
+**Shape precedence**: if multiple shapes fit, use this priority:
+`step` > `slope` > `oscillating` > `unclassified`. (Step changes are the
+most actionable; surface them first when ambiguous.)
+
+If no drift was flagged, skip shape classification entirely.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether drift was detected. A stable chart
+is the visual proof of stability.
+
+**Top chart: 60-day daily view** (Q1-daily series)
+- Line for the daily series.
+- **Shaded band** for the prior 30-day baseline window (subtle grey fill).
+- **Shaded band** for the recent 30-day drift window — red-tinted fill if drift is `down`, green-tinted if `up`, amber-tinted if `mixed`, grey if no drift.
+- Horizontal line for `mean_prior` (dashed grey).
+- Horizontal line for `mean_recent` (dashed, colored to match drift direction).
+- If `verdict_shape = step`, annotate the change-point date with a vertical dashed line.
+- Title: `<metric_name> — last 60 days, daily`.
+
+**Bottom chart: 16-week weekly view** (Q1-weekly series)
+- Line for the weekly series.
+- **Shaded band** for the prior 8-week baseline window (subtle grey fill).
+- **Shaded band** for the recent 8-week drift window — same direction-based coloring as above.
+- Horizontal lines for `mean_prior_weekly` (dashed grey) and `mean_recent_weekly` (dashed, colored).
+- Title: `<metric_name> — last 16 weeks, weekly`.
+
+Both charts share x-axis type (date) and consistent y-axis formatting.
+Render as two separate plots in one widget, stacked.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ DRIFT VERDICT ━━
+60-day / daily view:   <verdict>  <direction>  <magnitude>  (t-test p = <p>)
+16-week / weekly view: <verdict>  <direction>  <magnitude>
+Reconciled verdict:    <one sentence>
+Shape:                 <step | slope | oscillating | unclassified>  <change-point date if step>
+
+━━ CONTAMINATION ━━
+<none | recent window contains N outliers — drift confidence downgraded; recommend metric-anomaly first>
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is trend-level drift detection only. Point-in-time anomalies are not
+tested here — run `metric-anomaly` for that.
+```
+
+#### Headline phrasing discipline
+
+- No drift: "Metric is stable — trend has not shifted in the last 30 days or 8 weeks."
+- Level drift: "Metric has drifted [up/down] by X% over the last 30 days. [Weekly view confirms / Weekly view hasn't confirmed yet]."
+- Variance drift only: "Metric level is stable but volatility has [increased/decreased] — variance ratio [X.XX]. Something structural changed without moving the headline."
+- Compound drift: "Metric has drifted [up/down] by X% AND volatility changed. Compound drift — investigate both level and structure."
+- Contamination flag: append "Drift confidence is low — recent window has N outlier points. Run `metric-anomaly` first to clean up before attributing."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-drift",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-daily",  window: "last 60 days",  granularity: "day",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-weekly", window: "last 16 weeks", granularity: "week",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    daily:  { verdict, direction, level_delta, var_ratio, t_test_p, shape, change_point_date },
+    weekly: { verdict, direction, level_delta, var_ratio },
+    reconciled: <one-line reconciled verdict>,
+    contamination: { outlier_count, contaminated: bool }
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** Phase 1 and Phase 2 work as-is for multi-step funnels
+— the overall conversion series is what drifts. No special handling needed.
+
+**Retention metrics:** Retention is a rolling cohort metric — "drift" on a
+retention curve means cohort-over-cohort degradation. Replace the 60-day
+daily and 16-week weekly splits with a cohort-over-cohort comparison: last
+8 cohorts vs. prior 8 cohorts on the same retention day (D1, D7, D30). Flag
+which retention day shifted. Note in the verdict card: "Retention
+cohort-over-cohort comparison used in place of daily/weekly split."
+
+**Very low-volume metrics (<100 events/day):** The tests still apply but
+statistical confidence drops sharply. Downgrade confidence to `low` regardless
+of `level_delta` magnitude and note: "Low-volume metric — drift signal may be
+Poisson noise."
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not detect point-in-time anomalies.** That's `metric-anomaly`.
+- **Does not attribute cause.** Root-cause investigation is handled by `metric-rca` after detection.
+- **Does not produce recommendations beyond "run anomaly first" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean drift verdict in under 60 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-rca.md b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-rca.md
new file mode 100644
index 0000000..ac2bf98
--- /dev/null
+++ b/plugins/mixpanel-mcp-in/skills/monitor-metrics/commands/metric-rca.md
@@ -0,0 +1,484 @@
+# Command: metric-rca
+
+Root-cause investigation for a flagged metric. Takes the diagnosis payload
+from a prior `metric-anomaly` or `metric-drift` run and fans out across a
+set of segmentation branches to localise *where* the movement concentrated.
+Produces a ranked list of findings and appends them to the diagnosis board
+the user already created.
+
+This command does **not** re-run anomaly or drift detection. It assumes the
+movement has already been established — its job is attribution, not
+detection.
+
+---
+
+## Prerequisites
+
+Before this command runs, the session must hold a **diagnosis payload** in
+conversation memory from an earlier `metric-anomaly` or `metric-drift` run
+(see `SKILL.md` Step 2). The payload carries the project, metric, metric
+type, date ranges, flagged points or drift windows, and the query bodies
+used.
+
+If no payload exists, do **not** attempt to run RCA from a cold start. Tell
+the user: *"RCA runs on top of an existing anomaly or drift diagnosis. Run
+`metric-anomaly` or `metric-drift` first, then come back here."* Stop.
+
+### Board state
+
+If the user persisted the diagnosis as a Mixpanel board (Step 2 in
+`SKILL.md`), the payload will include `diagnosis_board_id`. This command
+**appends** to that board — it does not create a new one. If no board was
+created, skip the append step at the end and just return the findings
+inline; do not silently create a new board.
+
+### Ask once — business / market context
+
+Before firing Branch 5, ask the user exactly once:
+
+> *"What business or market is this metric tied to? (e.g., Indian
+> e-commerce, Indian OTT streaming, SEA fintech.) I'll use this to check
+> whether the flagged dates line up with festivals, launches, or
+> category-specific events."*
+
+Hold the answer as `business_context`. If the user skips or says "not
+relevant", skip Branch 5 entirely — do not guess the market from project
+name or memory.
+
+---
+
+## Phase 1 — Branch selection + parallel fan-out
+
+Read the payload and decide which branches to run. Every branch runs
+against the **same date ranges** the source command used:
+
+- `metric-anomaly` payload → use 7-day hourly + 30-day daily windows.
+- `metric-drift` payload → use 60-day daily + 16-week weekly windows, with
+  recent vs prior window comparison preserved.
+
+If both payloads exist in the session (user ran anomaly then drift),
+prefer the drift payload's date ranges — RCA over a longer window is more
+useful — and annotate findings with the anomaly payload's flagged
+timestamps for cross-reference.
+
+### Branch selection matrix
+
+| Branch | Purpose | Runs when |
+|---|---|---|
+| **Branch 1 — Component decomposition** | Break ratio/funnel/retention into its component events + metric-definition filters | `metric_type ∈ {ratio, funnel, retention}` |
+| **Branch 2 — Default-property breakdowns** | Source → geography → client-specific split | Always |
+| **Branch 3 — Distinct-ID outliers** | Find whether a small set of users drove the movement | Anomaly payload only. Skip if in-window distinct user count >10k |
+| **Branch 4 — Cohort comparison** | Run the metric filtered to the cohorts the user names to find concentration in named user segments | The user named one or more cohorts (or referenced a cohort in their ask) |
+| **Branch 5 — Calendar context** | Check whether flagged dates line up with festivals, launches, category events in `business_context` | `business_context` provided |
+
+Run all selected branches **in parallel** via concurrent `Run-Query` calls.
+Each branch can issue multiple queries; batch within a branch sequentially
+if one query's result informs the next (Branch 2's second level depends
+on the first).
+
+---
+
+## Branch 1 — Component decomposition
+
+Only runs for `ratio`, `funnel`, and `retention` metrics. The question:
+*is the movement in the numerator, the denominator, or a specific step?*
+
+**If the metric came from a saved Mixpanel Metric** (`metric_id` is set on
+the payload), read the component events, formula, and filters straight from
+the `Get-Metric` definition rather than re-deriving them — the definition is
+authoritative and avoids guessing the numerator/denominator. Fall back to
+the derivation below only when no saved-Metric definition is available.
+
+### For `ratio`
+1. Pull numerator event as a standalone count series (same window,
+   granularity, and filters from the metric definition).
+2. Pull denominator event as a standalone count series (same window,
+   granularity, and filters).
+3. Compare each component's deviation % against the ratio's overall
+   deviation %. Flag which component moved.
+4. If both components moved in the same direction by similar magnitude →
+   the ratio is stable but volumes shifted. Note as a volume story, not a
+   conversion story.
+5. If only one moved, or they moved opposite directions → the ratio
+   shift is concentration-driven. Identify which.
+
+### For `funnel`
+1. Run the **same funnel definition** twice as `report_type=funnels` via
+   `Run-Query`: once for the recent (drift/anomaly) window, once for the
+   baseline window. The native funnels response returns step conversion
+   rates and absolute counts per step.
+2. For each step pair, compute the conversion-rate delta between recent
+   and baseline.
+3. Flag the **specific step pair** with the largest absolute conversion
+   drop. One step usually owns the drop; surface that pair as the
+   headline finding.
+4. If the funnel has step-level filters (e.g. property filters on
+   individual steps), do not decompose into standalone event counts —
+   the filters change the meaning. The native funnels query is the only
+   faithful comparison.
+
+This replaces the prior "pull each funnel step as a standalone event
+count" approach. Standalone event counts ignore step ordering and
+step-level filters; the native funnels report does not.
+
+### For `retention`
+1. Pull the cohort-defining event as a standalone count series.
+2. Pull the return event as a standalone count series.
+3. Check whether cohort size changed, return count changed, or both.
+4. A drop in retention with stable return count + larger cohort is a mix
+   effect; a drop in return count with stable cohort is real attrition.
+
+### Event × metric-definition filter combinations
+
+For every component event above, re-run it with **each filter from the
+metric definition applied independently** (i.e. one filter at a time, not
+all combinations — combinatorial blowup is not useful here). This shows
+whether a specific filter value concentrates the movement.
+
+Example: if the metric definition has `user_type = premium` baked in,
+and the numerator event is `video_play`, run:
+- `video_play` with no filter
+- `video_play` with `user_type = premium` (the baked filter) — this
+  should match the metric's numerator
+- `video_play` broken down **by** `user_type` (all values) — exposes
+  whether the movement is specific to `premium` or shared across the
+  population.
+
+Cap at 5 filter values per property breakdown; drop the long tail.
+
+---
+
+## Branch 2 — Default-property breakdowns
+
+Two-level cascade. Always runs.
+
+### Level 1 — Source segmentation
+
+Break down the metric by the SDK / ingestion source. Two properties
+together:
+
+- Event property `mp_lib` (string) — SDK name (e.g. `web`, `android`,
+  `iphone`, `swift`, `python`, `ruby`, `java`).
+- Event property `$import` (boolean) — true for events ingested via the
+  Import API, false for Track API.
+
+Output: a matrix of `mp_lib × $import` with deviation % per cell. The
+goal here is to isolate whether the movement is concentrated in
+client-side vs server-side vs Import API ingestion.
+
+### Level 2 — Conditional breakdowns
+
+The Level 2 slice depends on what Level 1 surfaced. Run the slice whose
+dominant source owns the movement; skip the others.
+
+**For client-side sources (`web`, `android`, `iphone`, `swift`, etc.):**
+Common first slice — geography in a step function:
+- Event property `$os`
+- Event property `platform` (or the project's equivalent; check the
+  metric definition or fall back to `mp_lib` if not present)
+- Event property `mp_country_code`
+- Event property `$region`
+- Event property `$city`
+
+Run these as a **step function**, not a cross-product: start with
+`mp_country_code`. If one country owns >50% of the movement, break that
+country down by `$region`. If one region owns >50%, break by `$city`.
+Stop when the concentration flattens.
+
+**For `web` specifically:**
+- Event property `$device`
+- Event property `utm_source`
+- Event property `$browser`
+
+**For `android` / `iphone` / `swift` / `ios`:**
+- Event property `$app_version_string`
+- Event property `$model`
+
+Run these as single-property breakdowns, not two-level (avoids the
+high-cardinality two-level truncation risk that bites large projects).
+
+### Cardinality discipline
+
+- Any breakdown returning exactly 1,000 / 3,000 / 10,000 rows is
+  potentially truncated — flag in findings, do not treat the result as
+  exhaustive.
+- If a two-level breakdown (`mp_lib × $import`) is used, keep the
+  first-level cardinality bounded: if `mp_lib` returns >20 distinct
+  values, filter to the top 10 by volume before running the second
+  level.
+
+---
+
+## Branch 3 — Distinct-ID outliers
+
+Only runs for anomaly payloads. Goal: is a small set of users
+responsible for the flagged point(s)?
+
+### Cardinality gate
+
+Before running, check in-window distinct user count against the metric's
+base query. If >10,000 distinct users contributed to the metric in the
+flagged window, skip this branch and note "Branch 3 skipped — user
+cardinality too high for outlier detection via MCP." A top-N breakdown
+on 100k users returns noise.
+
+### If within cardinality
+
+1. Break the metric down by `distinct_id` for the flagged window only
+   (not the whole series — this keeps the query tractable).
+2. Rank users by their contribution to the metric in the flagged window.
+3. Flag outliers: users whose contribution in the flagged window is
+   >5σ above the median user's contribution, OR users who appear in
+   the flagged window but not in the baseline window.
+4. Cap output at the top 20 distinct_ids by deviation.
+
+If the top 5 users account for >30% of the movement → strong user-driven
+outlier signal. Surface this prominently. Could be bots, internal test
+traffic, or a single high-volume customer.
+
+### Optional follow-up — session replay context
+
+If the top 3 distinct_ids each account for ≥10% of the movement individually,
+offer the user a follow-up: *"Top user(s) `<distinct_id>` drove [X]% of the
+flagged window. Want me to pull their session replays from that window so
+you can see what they did?"*
+
+If the user says yes, call `Get-User-Replays-Data` for each flagged
+distinct_id with `from_date` and `to_date` set to the flagged window. Cap at
+3 distinct_ids and 5 replays per user. Surface the replay URLs + timestamps
+in the findings card under the Branch 3 section.
+
+This is **opt-in only** — do not pull replays automatically. Replays add
+value when the customer wants the "what did they actually do" answer, but
+they're noisy if Session Replay isn't widely enabled in the project. Ask
+once, run if confirmed, skip if declined.
+
+---
+
+## Branch 4 — Cohort comparison
+
+Goal: is the movement concentrated in a specific user cohort the customer
+already cares about? Cohorts are typically the most CSA-actionable RCA
+signal — "your churn-risk cohort dropped 40%" is a far better headline than
+"users on iOS 17.4 dropped 40%."
+
+### Step 1 — Identify candidate cohorts
+
+The Mixpanel MCP surface has **no cohort-listing tool** — `Search-Entities`
+does not support a `cohort` entity type (its types are insights, funnels,
+flows, retention, dashboard, launch-analysis, experiments, feature-flags,
+metric-trees, playlists, heat-maps). Branch 4 therefore cannot auto-discover
+cohorts; source them from the user instead:
+
+1. If the user named cohorts in their original ask (e.g. "is this happening
+   in our power users?"), use those.
+2. Otherwise, ask once: *"Want me to compare against any saved cohorts? If
+   so, name them (or share their cohort IDs) and I'll filter the metric to
+   each."*
+
+If the user names no cohorts (or declines) → record *"Branch 4 skipped — no
+cohorts named; cohort auto-discovery isn't available on the MCP surface."*
+and continue.
+
+### Step 2 — Resolve the named cohorts
+
+Cap at the **top 5 cohorts** the user named. For each, resolve its
+`cohort_id` — the user may give a name or an id; if only a name is given,
+confirm it back before filtering. If the user named more than 5, ask which
+five matter most.
+
+Surface the cohort names in the findings — the customer recognizes their
+own cohort names and that's part of the value.
+
+### Step 3 — Run the metric filtered by each cohort
+
+For each selected cohort, run the same `query_template` as the headline
+metric, with one cohort-membership filter added. The exact filter shape
+comes from `Get-Query-Schema` — Mixpanel's query schema accepts cohort
+membership as a filter on `distinct_id` referencing the cohort_id.
+
+Run all cohort queries in parallel via concurrent `Run-Query` calls. Each
+query covers the same date window the source command used (drift window
+or anomaly window).
+
+### Step 4 — Score and rank
+
+For each cohort, compute the same concentration + deviation scores used
+in the Phase 2 ranking step (cohort_delta_abs / total_delta_abs and the
+cohort's own deviation %). Treat cohorts as candidate findings the same
+way property breakdowns are treated.
+
+A cohort is **important** if either:
+- It explains ≥30% of the headline movement (lower threshold than the
+  default 40% — cohorts are smaller slices than top-level properties,
+  and 30% concentration in a named cohort is a strong signal), OR
+- Its individual deviation is ≥1.5× the headline metric's deviation.
+
+### Error handling
+
+| Situation | Response |
+|---|---|
+| User names no cohorts | Skip branch, record reason. |
+| A cohort filter fails in `Run-Query` (cohort schema mismatch) | Retry once. If still failing, skip that cohort, continue others, note in branch coverage. |
+| All cohort queries fail | Skip branch, note "Branch 4 skipped — cohort filtering failed across all cohorts." |
+
+---
+
+## Branch 5 — Calendar context
+
+Only runs if the user provided `business_context`.
+
+1. Identify the key dates in the flagged window. For anomaly payloads,
+   use the timestamps from `payload.flags.hourly` and `payload.flags.daily`.
+   For drift payloads, use the change-point date if `shape = step`, or
+   the start of the drift window otherwise.
+2. Run a `web_search` with a query built from `business_context` + the
+   relevant date(s). Example: if `business_context = "Indian e-commerce"`
+   and the change-point is `2026-03-08`, search `"Indian e-commerce
+   events March 8 2026 festival sale"`. If `web_search` isn't available in
+   this runtime, skip Branch 5 and record *"Branch 5 skipped — web search
+   unavailable in this runtime"* (mirrors the no-`business_context` skip);
+   the other four branches still run.
+3. Look for matches: religious festivals, cricket fixtures, sale events
+   (BBD, EOSS, GOSF), product launches, regulatory dates (e.g. RBI policy
+   announcements).
+4. If a plausible match surfaces, include it in findings with a
+   confidence label: `strong` (exact date match, major event), `moderate`
+   (same week, category-aligned), `weak` (same month, tangential).
+5. If nothing surfaces, record: *"No calendar events found for
+   `<business_context>` on the flagged dates."*
+
+This branch is **context**, not **evidence**. Phrase findings as "the
+flagged date falls on [event]" — never as "the [event] caused the
+movement." Correlation only; causation belongs to the customer.
+
+---
+
+## Phase 2 — Synthesise, rank, visualise
+
+### Rank findings
+
+For every branch, each sub-segment (a `mp_lib` value, a country, a funnel
+step, a distinct_id, etc.) is a candidate finding. Score each:
+
+- **Concentration score** — share of the total movement this segment
+  explains. `segment_delta_abs / total_delta_abs`. A segment with 70%
+  concentration is worth surfacing; 5% is not.
+- **Deviation score** — this segment's deviation % compared to its own
+  baseline. A segment that individually deviated 40% is stronger signal
+  than one that deviated 5%.
+
+Flag a finding as **"important"** if **either** of these is true:
+- Concentration score ≥ 0.4 (one segment owns ≥40% of the movement), OR
+- Segment deviation ≥ 1.5× the headline metric's deviation (the movement
+  concentrates here).
+
+Cap total important findings at 6. If more than 6 qualify, keep the top 6
+by concentration × deviation combined rank.
+
+### Visualise important findings
+
+Render a single visualizer widget containing one chart per important
+finding, stacked vertically. Chart type by branch:
+
+| Branch | Chart |
+|---|---|
+| Branch 1 (component) | Two-line overlay: headline metric vs component metric, same window, same granularity |
+| Branch 2 (property breakdown) | Horizontal bar chart, one bar per segment, bar length = deviation %, color-coded by direction |
+| Branch 3 (distinct_id) | Horizontal bar chart, top-N users by contribution % in flagged window |
+| Branch 4 (cohort) | Horizontal bar chart, one bar per important cohort, bar length = deviation %, color-coded by direction |
+| Branch 5 (calendar) | No chart — rendered as an annotation in the written findings block |
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]`
+once if not already loaded this session. Do not narrate the read_me call.
+
+### The findings card
+
+```
+METRIC: <metric_name> — <project_id>
+DIAGNOSIS SOURCE: <metric-anomaly | metric-drift | both>
+WINDOW: <window described in the same language as the source verdict card>
+
+━━ HEADLINE ━━
+<one sentence naming the strongest finding, or "No single segment concentrates the movement — treat as distributed.">
+
+━━ IMPORTANT FINDINGS (ranked) ━━
+1. [Branch N] <segment description> — <concentration %> of movement,
+   <deviation %> vs baseline. <one-line interpretation>.
+2. ...
+(cap 6; omit section if no important findings)
+
+━━ BRANCH COVERAGE ━━
+Branch 1 (component):        <ran | skipped — reason>
+Branch 2 (default props):    <ran | skipped — reason>
+Branch 3 (distinct_id):      <ran | skipped — reason>
+Branch 4 (cohort):           <ran + N cohorts compared | skipped — no cohorts named>
+Branch 5 (calendar):         <ran + N events found | skipped — no business context>
+
+━━ WHAT THIS ISN'T ━━
+This is attribution by segmentation, not causal analysis. Findings show
+where the movement concentrated; they do not prove what caused it.
+Calendar matches are correlation only.
+```
+
+### The RCA payload (passed back to SKILL.md)
+
+After rendering the findings card + charts, hand back to the skill-level
+flow:
+
+```
+{
+  command: "metric-rca",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  source_payload_command: "metric-anomaly" | "metric-drift",
+  business_context: <string or null>,
+  rca_queries: [
+    { branch: int, label: str, run_query_body: dict, result: dict }, ...
+  ],
+  important_findings: [
+    { branch: int, segment: str, concentration_pct: float,
+      deviation_pct: float, interpretation: str,
+      chart_spec: dict },
+    ... (cap 6)
+  ],
+  findings_card: <full rendered card above>,
+  headline: <the HEADLINE line>,
+  diagnosis_board_id: <from source payload, or null>
+}
+```
+
+The skill-level flow (Step 3 in `SKILL.md`, added with this command)
+handles the board append.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| No diagnosis payload in session | Stop. Tell user to run `metric-anomaly` or `metric-drift` first. |
+| A branch query fails | Retry once. If still failing, mark that branch partial, continue others, note in branch coverage. |
+| All branches fail | Stop. Report failure and ask the user to verify project access. |
+| Branch 2 Level 1 returns only one `mp_lib × $import` cell with meaningful volume | Skip Branch 2 Level 2 conditional logic; run the fallback geography step function directly. |
+| User declines to provide `business_context` | Skip Branch 5 entirely, proceed with others. |
+| `web_search` unavailable in this runtime | Skip Branch 5, record "Branch 5 skipped — web search unavailable." Other branches continue. |
+| No important findings after ranking (all segments <40% concentration and <1.5× deviation) | Surface that finding: "Movement is distributed across segments — no single dimension concentrates it." This is a valid, useful result. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not re-run anomaly or drift detection.** It consumes the payload.
+- **Does not claim causation.** Correlation by segmentation is the ceiling.
+- **Does not cross-join properties combinatorially.** Branch 2 is a
+  step-function cascade, not a cross-product, because high-cardinality
+  two-level breakdowns truncate silently.
+- **Does not source calendar dates from memory.** Always `web_search`
+  with the user-provided `business_context` (skips gracefully if web search
+  is unavailable).
+- **Does not create a new board.** Appends to the existing diagnosis
+  board via the skill-level flow.
+
+Keep the surface narrow. A ranked list of 3-6 concentrated segments with
+charts beats a 40-branch exhaustive report every time.
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000001 b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000001
new file mode 100644
index 0000000..11a1684
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000001
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `Mixpanel MCP:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `Mixpanel MCP:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `Mixpanel MCP:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `Mixpanel MCP:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `Mixpanel MCP:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000002 b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000002
new file mode 100644
index 0000000..11a1684
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000700000002
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `Mixpanel MCP:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `Mixpanel MCP:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `Mixpanel MCP:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `Mixpanel MCP:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `Mixpanel MCP:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000800000003 b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000800000003
new file mode 100644
index 0000000..c6ec4b8
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000800000003
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the Mixpanel MCP. Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000900000004 b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000900000004
new file mode 100644
index 0000000..a14f131
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000900000004
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires Mixpanel MCP.
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp` connector (Mixpanel US). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000a00000005 b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000a00000005
new file mode 100644
index 0000000..a79b6dd
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/.fuse_hidden0000000a00000005
@@ -0,0 +1,459 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp` connector (Mixpanel US).
+---
+
+# Monitor Metrics
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp` connector (Mixpanel US). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `Get-Properties` with
+   `property_names=[<filter_property>]` and `resource_type=<Event|User>`.
+   If it doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/SKILL.md b/plugins/mixpanel-mcp/skills/monitor-metrics/SKILL.md
new file mode 100644
index 0000000..357d2f7
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/SKILL.md
@@ -0,0 +1,462 @@
+---
+name: monitor-metrics
+description: >
+  Monitor and diagnose a Mixpanel metric for anomalies, drift, and root
+  cause. Use whenever the user asks to investigate, debug, monitor, or
+  explain a change in a Mixpanel metric — a saved Metric, KPI, conversion
+  rate, retention, event count, funnel step, or anything tracked in a saved
+  report or dashboard. Trigger phrases: "monitor [metric]", "what's going on
+  with [metric]", "why did [metric] drop/spike", "diagnose this metric",
+  "check for anomalies", "has [metric] drifted", "is this metric stable",
+  "something looks off", "did [metric] change last month", "what's driving
+  the drop", "where is the movement coming from", "run RCA on this metric".
+  Also trigger when the user shares a Mixpanel report/dashboard/metric link
+  and asks what's happening, or describes a metric in prose and wants to know
+  if the movement is real. Do NOT trigger for portfolio health checks (use
+  `weekly-pulse`) or adoption reports (use `gtm-customer-intelligence`).
+  Requires the `mixpanel-mcp` connector (Mixpanel US).
+---
+
+# Monitor Metrics
+
+> **Connector:** This skill operates exclusively against the `mixpanel-mcp` connector (Mixpanel US region). Every Mixpanel MCP tool call in this SKILL.md and in every file under `commands/` must be routed through `mixpanel-mcp` — never any other Mixpanel connector.
+
+A focused diagnostic skill for a single metric at a time. Works for any
+project the user has access to. Requires the `mixpanel-mcp` connector (Mixpanel US). Answers three
+questions cleanly:
+
+1. **Is a recent point weird?** (anomaly detection — `metric-anomaly`)
+2. **Has the baseline itself shifted?** (drift detection — `metric-drift`)
+3. **Where is the movement coming from?** (root-cause attribution —
+   `metric-rca`)
+
+Separation matters because the customer conversation is different for each:
+an anomaly is an incident, drift is a trend, and RCA is the segmentation
+story that makes either of the first two actionable.
+
+`metric-rca` runs on top of an existing anomaly or drift diagnosis — it
+consumes the diagnosis payload, fans out across segmentation branches, and
+appends its findings to the diagnosis board. It does not perform detection
+itself.
+
+---
+
+## Commands
+
+This skill has three commands. Route to the right one based on the user's
+ask.
+
+### `metric-anomaly`
+Detect point-in-time anomalies — recent spikes, drops, and clusters in a
+single metric. Uses time-bucketed Z-score + IQR tests against 7-day hourly
+and 30-day daily series. Produces flagged timestamps, classification
+(isolated / cluster / edge), and a verdict. **Does not** test for
+trend-level drift.
+
+Trigger when the user wants to know *whether a specific point looks weird* —
+"is this spike real?", "did something happen yesterday?", "is this a blip?".
+
+→ See `commands/metric-anomaly.md`
+
+### `metric-drift`
+Detect trend-level drift — whether the baseline has shifted. Runs mean-shift
+and variance-ratio tests on 60-day daily (last 30 vs prior 30) and 16-week
+weekly (last 8 vs prior 8) windows. Includes a lightweight outlier
+contamination check so it can run standalone without `metric-anomaly`
+first. Produces direction, magnitude, shape (step/slope/oscillating), and
+a verdict. **Does not** flag individual point anomalies.
+
+Trigger when the user wants to know *whether the trend has changed* —
+"has this drifted?", "is the baseline different now?", "what's happened over
+the last month?".
+
+→ See `commands/metric-drift.md`
+
+### `metric-rca`
+Root-cause attribution on top of an existing anomaly or drift diagnosis.
+Fans out across five branches — component decomposition, default-property
+breakdowns, distinct-id outliers, cohort comparison, and calendar/market
+context — over the same date windows the source command used. Ranks findings
+by concentration and deviation, renders charts for the important ones, and
+appends results to the diagnosis board.
+
+Trigger when the user wants to know *where the movement came from* —
+"what's driving this drop?", "where is the spike concentrated?", "break
+this down", "run RCA", "is it a specific segment?". Requires a prior
+`metric-anomaly` or `metric-drift` run in the same session.
+
+→ See `commands/metric-rca.md`
+
+---
+
+## Choosing between the commands
+
+- **Ambiguous or exploratory ask** ("something looks off") → default to
+  `metric-anomaly` first. Anomaly is cheaper (2 queries) and catches
+  point-in-time issues that would contaminate a drift test.
+- **"Has this changed over the last month?"** → `metric-drift` directly.
+- **Both detection questions matter** → run `metric-anomaly` first, then
+  `metric-drift`. Drift will pick up any anomaly context if present and
+  downgrade confidence accordingly.
+- **User asks "why" or "where" after seeing a verdict** → `metric-rca`.
+- **User opens with "why did X drop"** → run `metric-anomaly` or
+  `metric-drift` first (whichever fits their framing better), then flow
+  into `metric-rca`. Do not run RCA cold — it needs the detection payload.
+
+---
+
+## Step 0 — Input validation (both commands)
+
+**Do not skip this step.** Before touching Step 1 or anything downstream,
+confirm the user has given both a project and a metric. If either is
+missing, ask once and wait.
+
+### Step 0a — Resolve org/project context first
+
+Before validating the project, call `mixpanel-mcp:Get-Business-Context`
+**once per session**. Pass `project_id` if the user already gave one;
+otherwise call without it. This returns:
+
+- Org-specific vocabulary (project nicknames, internal acronyms, product
+  terms) that may resolve the user's request without needing `Get-Projects`.
+- Project-specific guidance on how that customer queries their data
+  (relevant for any project with established conventions).
+
+If business context resolves the project name → proceed directly to the
+metric validation step. If not → fall through to `Get-Projects`.
+
+Skip this call only if the user's input is unambiguous (a numeric
+`project_id` plus a clearly-named saved metric/report, with no project name
+to interpret).
+
+### Validate the project
+
+| Situation | Action |
+|---|---|
+| User gave a `project_id` (int) | Call `mixpanel-mcp:Get-Projects`, find the matching entry, and confirm the project **name** back to the user in one line: *"Running on project `<name>` (id: `<project_id>`) — confirm?"*. Wait for confirmation. |
+| User gave a project **name** only | Call `mixpanel-mcp:Get-Projects`, find the match. If one match, resolve the id and confirm back. If multiple matches or no match, list the candidates and ask the user to pick. |
+| Neither given | Ask: *"Which Mixpanel project should I run this on? Share the project id, name, or a report/metric URL."* Do not guess from memory or past conversations. |
+
+Store the resolved `project_id` and `project_name` on the metric series object.
+
+### Validate the metric
+
+Resolve in this priority order. **Saved Mixpanel Metrics are the preferred
+input** — they carry a complete, machine-readable definition (see Step 1).
+
+| Situation | Action |
+|---|---|
+| User named a metric, or said "metric" generically | Call `mixpanel-mcp:List-Metrics` with `project_id` and `query=<name>`. If one saved Metric matches, confirm the resolved name back to the user. If several match, list and ask. If none match, fall through to the other shapes below (saved report / prose). |
+| User gave a metric **id** | Treat as a saved Metric. Confirm via `Get-Metric` in Step 1. |
+| User gave a report URL, `bookmark_id`, or dashboard URL | Resolve via the Step 1 input-shape table. Confirm the resolved metric name and one-sentence definition back to the user before firing queries. |
+| User described the metric in prose | Still call `List-Metrics` once to check whether a saved Metric already captures it — reuse beats rebuild. If no match, confirm the prose definition back to the user in one sentence before firing queries. |
+| Nothing given | Ask: *"Which metric are we diagnosing? Share a saved Metric name, a report URL, a bookmark id, or describe it in one line."* Do not assume from context. |
+
+Only proceed once both project and metric are confirmed.
+
+---
+
+## Step 1 — Metric ingestion (both commands)
+
+Resolve the metric into a single canonical form: a normalized **metric
+series** object whose `query_template` is the `report` body each command
+will replay at its own date windows.
+
+There are two ways `query_template` gets built. **Prefer the first.**
+
+### Path A — Saved Mixpanel Metric (preferred)
+
+A saved Metric is the only input shape that returns its **full definition**
+programmatically. Use it whenever Step 0 resolved a saved Metric.
+
+1. Call `mixpanel-mcp:Get-Metric` with `project_id` and `metric_id`.
+2. The response carries the complete metric structure — events, formulas,
+   filters, and aggregation. Lift this directly into `query_template`. You
+   do **not** need to reconstruct it from prose, and you do **not** need
+   `Get-Query-Schema` for a saved Metric — the definition is authoritative.
+3. Confirm the resolved metric **name** and a one-line plain-English summary
+   of what it measures back to the user before firing any time-series query.
+4. Record `metric_id` on the series object so a board or RCA run can
+   reference the source Metric.
+
+### Path B — Saved report, dashboard tile, or prose (rebuild)
+
+Used when there is no saved Metric. Here `query_template` must be **built
+fresh** and confirmed with the user, because these shapes do not expose a
+replayable query body.
+
+> **Important:** `Get-Report` returns report metadata + results at the
+> report's native granularity but **does not** return the underlying query
+> definition. Saved reports are only a starting point for confirming the
+> metric definition — every downstream `Run-Query` is built fresh from the
+> confirmed prose definition using `Get-Query-Schema`. (This is the key
+> difference from Path A: `Get-Metric` *does* return a replayable
+> definition; `Get-Report` does not.)
+
+#### Input shape resolution (Path B)
+
+| Input shape | How to recognize | How to resolve |
+|---|---|---|
+| **Saved report (with ID)** | A `bookmark_id` + `project_id`, or a report URL containing `/report/<project_id>/<bookmark_id>` | Call `Get-Report` with `skip_results=false`. From the metadata + native-granularity results, draft a one-sentence prose definition (event(s), measurement type, obvious filters). Confirm with the user. |
+| **Dashboard tile (with URL or ID)** | A dashboard URL containing `/dashboards/<dashboard_id>` | Call `Get-Dashboard` with `include_layout=true`, find the matching report cell, then treat as saved report (above). |
+| **Report/dashboard referenced by name only** | "the conversion tile on the funnel board" with no URL | Call `Search-Entities` with appropriate `entity_types` (`["dashboard"]` for boards; `["insights","funnels","retention","flows"]` for reports) and `query=<name>`. One match → resolve. Multiple → list and ask. None → ask for the URL. |
+| **Natural language** | User describes the metric in prose | Confirmation already done in Step 0. Proceed to query construction. |
+
+#### Build the query body (Path B)
+
+Once the metric definition is confirmed in prose:
+
+1. Determine `report_type` (`insights`, `funnels`, `retention`, or `flows`).
+2. Call `Get-Query-Schema` for that report type.
+3. Construct the `report` body — events, measurement, filters, breakdowns —
+   matching the prose definition. Do **not** copy from a saved report's raw
+   response; build from the schema.
+
+### Normalize to a "metric series" object internally
+
+```
+{
+  project_id: int,
+  project_name: str,             # resolved and confirmed in Step 0
+  metric_id: int | null,         # set when source is a saved Metric (Path A)
+  metric_name: str,              # human-readable label
+  metric_definition: str,        # one-sentence what-it-measures (confirmed)
+  report_type: str,              # insights | funnels | retention | flows
+  query_template: dict,          # `report` body (from Get-Metric or Get-Query-Schema)
+  default_filters: list,         # filters baked into query_template, for RCA reference
+}
+```
+
+Every downstream step operates on this object. Each command's Phase 1
+overrides only `dateRange` and `unit` (granularity) on `query_template`.
+
+**Funnel and retention classification** is owned by each command's own
+pre-flight (top of `commands/metric-anomaly.md` and `commands/metric-drift.md`),
+not by Step 1. Step 1 is deliberately narrow: resolve the metric into a
+normalized series object. Nothing more.
+
+---
+
+## Step 1.5 — Project profile resolution
+
+Before writing any time-series query, resolve a minimal project profile.
+This step is cheap (metadata calls only) and catches filter/instrumentation
+problems before they contaminate the diagnosis.
+
+### Filter resolution (cheap metadata calls, not probe queries)
+
+For every filter referenced in `query_template` (billing/account filters,
+exclusions, user-property filters, segment scopes):
+
+1. **Confirm the property exists.** Call `List-Properties` with
+   `names=[<filter_property>]` and `resource_type=<Event|User>` (pass
+   `events=[<event>]` to scope to a specific event's properties). If it
+   doesn't resolve, stop and tell the user — the filter references a
+   property that doesn't exist in this project.
+2. **Confirm the filter value is real.** Call `Get-Property-Values` with
+   the property name and (for event properties) the relevant event. If the
+   filter value isn't in the returned distinct values, stop and tell the
+   user — the filter excludes everything because the value never appears.
+
+Skip this for filters that came from a saved Metric definition (Path A) and
+are already known-good — but still validate any filter the *user* added on
+top of the saved Metric.
+
+### Instrumentation health check
+
+Call `Get-Issues` once, scoped to the events used by `query_template`
+(`event_name=<event>` for each), with `since_date` set to the earliest
+date the diagnosis will look at (60 days back for drift, 30 days back for
+anomaly). If issues exist (type drift, null spikes, schema changes) in
+that window:
+
+- Capture issue summaries.
+- Do **not** abort the diagnosis. Carry these forward to the verdict card
+  under contamination — a separate signal from the statistical
+  contamination check. The customer needs to know if instrumentation
+  changed during the window even if the metric itself looks stable.
+
+### Two-level breakdown truncation note
+
+Two-level breakdowns can return truncated result sets on high-cardinality
+dimensions. Treat any result that looks suspiciously round (e.g. exactly
+1,000 / 3,000 / 10,000 rows and no tail) as potentially truncated and
+confirm before relying on it. Mainly an RCA Branch 2 concern but applies
+anywhere a two-level breakdown is run.
+
+Store as `project_profile` for downstream use:
+```
+{
+  filters_validated: list,           # filters confirmed to resolve
+  instrumentation_issues: list,      # issues from Get-Issues, may be empty
+  truncation_warnings: list,         # populated by downstream branches
+}
+```
+
+---
+
+## Output contract
+
+Both commands produce a structured verdict, not a data dump. The commands
+define their own output formats; common principles:
+
+- **Default to compact.** A CSA scanning between calls needs a verdict in under 60 seconds. Full detail is opt-in.
+- **Always chart the trend.** Both commands always render inline charts — whether anomalies/drift were detected or not. A stable metric gets the same charts; the visual confirmation of stability is just as valuable as flagging a problem. Annotation overlays (anomaly dots, drift window shading, change-point markers) only appear when something was flagged.
+- **Fixed section order.** Headline → confidence → next step. Never lead with a hedge.
+- **Explicit scope limits.** Every output names what it did *not* do ("this does not test for drift — run `metric-drift`"; "this does not flag individual anomalies — run `metric-anomaly`").
+
+Never output a wall of tables or raw query results. The CSA is the audience,
+and the goal is a verdict they can act on.
+
+---
+
+## Step 2 — Post-diagnosis handoff (both commands)
+
+At the end of Phase 3, each command hands back a structured **diagnosis
+payload** to the skill-level flow. The skill then offers the user a board,
+and caches the payload in conversation memory for a future `metric-rca`
+command.
+
+### The diagnosis payload
+
+Both commands return the same shape:
+
+```
+{
+  command: "metric-anomaly" | "metric-drift",
+  project_id: int,
+  project_name: str,
+  metric_id: int | null,
+  metric_name: str,
+  metric_definition: str,
+  metric_type: str,
+  queries: [
+    { label: str, window: str, granularity: str, run_query_body: dict, result: dict },
+    ...
+  ],
+  verdict_card: str,       # the full rendered card from Phase 3
+  headline: str,           # one-line summary from the card
+  flags: dict              # command-specific (flagged points for anomaly; level_delta / var_ratio / shape for drift)
+}
+```
+
+This payload is held in conversation memory only — do not write to disk.
+It survives for the session and is what `metric-rca` consumes when
+invoked. If the user later creates a board (below), the resulting
+`board_id` is attached to the payload as `diagnosis_board_id` so
+`metric-rca` knows where to append.
+
+### The board prompt
+
+After rendering the Phase 3 charts + verdict card, ask the user **exactly
+once**:
+
+> *"Want me to save this as a board in Mixpanel?"*
+
+Do not offer the prompt if either of these is true:
+- The command aborted in error handling (no usable verdict).
+- The metric is `retention` and the command was `metric-anomaly` (was skipped to drift — nothing to board).
+
+### If the user says yes
+
+Create a dashboard in the same `project_id`. Use `Create-Dashboard` directly
+— this case (one board, N reports, one text card) is simple enough that
+delegating to a dashboard-manager skill adds unnecessary indirection.
+
+Build the rows as follows:
+
+1. **Run each query in `queries[]` first** with `skip_results=true` to
+   register them and get their `query_id`s back. Do this in parallel.
+2. **Assemble the dashboard rows:**
+   - Row 1: a single text cell containing `verdict_card` (HTML-formatted
+     using `Create-Dashboard`'s allowed tags: `<h2>`, `<h3>`, `<p>`,
+     `<strong>`, `<ul>`, `<li>`, `<br>`, etc. — no newlines, each element
+     is a new line).
+   - Row 2 onwards: one report cell per query in `queries[]`, named
+     `<metric_name> — <window>, <granularity>` (matching the chart titles
+     from Phase 3).
+3. **Call `Create-Dashboard`** with `title=<metric_name> — <command>
+   diagnosis (YYYY-MM-DD)`, the rows above, and the user's project_id.
+
+Return the board URL to the user when done, and **store the resulting
+`board_id` back onto the diagnosis payload as `diagnosis_board_id`** so a
+subsequent `metric-rca` run can append to it.
+
+For the **append** path at Step 3 (adding RCA findings to an existing
+board), use `Get-Dashboard` (with `include_layout=true`) → `Update-Dashboard`
+to add cells without disturbing the existing layout.
+
+### If the user says no
+
+Do nothing. The payload is already in conversation memory; `metric-rca`
+will pick it up when invoked later in the session.
+
+---
+
+## Step 3 — Post-RCA board append
+
+Runs after `metric-rca` returns its payload (see `commands/metric-rca.md`
+Phase 2). The RCA payload carries `important_findings`, `findings_card`,
+and `rca_queries` — Step 3's job is to append these to the existing
+diagnosis board without creating a new one.
+
+### Append target
+
+Read `diagnosis_board_id` from the source payload (the anomaly/drift
+payload that RCA consumed).
+
+- **If present** → append to that board. This is the default path.
+- **If null** (the user declined the board earlier) → do not create a
+  board silently. Return the findings card + charts inline and tell the
+  user: *"No diagnosis board was created earlier, so I'm not appending
+  anywhere. Want me to create a board now with the diagnosis + RCA
+  findings together?"* If they say yes, follow Step 2's board-creation
+  path first, then run Step 3 against the new board.
+
+### What to append
+
+Use `Get-Dashboard` (`include_layout=true`) → `Update-Dashboard` to append.
+The content to add, in order:
+
+1. **One text card** containing `findings_card` verbatim. Place it
+   beneath the existing Phase 3 verdict card (visual continuity: diagnosis
+   first, then attribution).
+2. **One saved report per important finding** — use `chart_spec` +
+   `run_query_body` from the RCA payload's `rca_queries`. Name each
+   `<metric_name> — RCA: <segment description>` so the board reads as a
+   story: headline → verdict → findings → per-segment charts.
+
+Cap appended reports at 6 (matches the RCA findings cap). If there are
+zero important findings, append only the text card — the "no single
+segment concentrates the movement" result is still worth boarding.
+
+### Do not offer a second prompt
+
+RCA's append to an existing board is automatic — do not ask *"should I
+append?"*. The user already opted into the board at Step 2. The only ask
+at Step 3 is the fallback above, when no board exists yet.
+
+Return the updated board URL when done.
+
+---
+
+## When not to use this skill
+
+- **Portfolio-wide sweeps** → use `weekly-pulse`.
+- **Full adoption story / QBR prep** → use `gtm-customer-intelligence`.
+- **Lexicon / instrumentation health** → use `manage-lexicon`.
+- **Metric definition help** ("how should I measure X?") → answer directly, no skill needed.
+- **Root-cause investigation from scratch, without a prior diagnosis** →
+  run `metric-anomaly` or `metric-drift` first, then `metric-rca`. RCA
+  does not run cold.
+
+This skill is deliberately narrow: one metric, one diagnosis, one
+attribution pass.
+
+---
+
+## Files
+
+- `commands/metric-anomaly.md` — point-in-time anomaly detection (Z-score + IQR, time-bucketed; 2 queries; 7-day hourly + 30-day daily views)
+- `commands/metric-drift.md` — trend-level drift detection (mean shift + variance ratio; 2 queries; 60-day daily + 16-week weekly views; owns shape classification)
+- `commands/metric-rca.md` — root-cause attribution (5-branch segmentation fan-out on same windows as source command; ranks findings by concentration × deviation; appends to the diagnosis board)
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-anomaly.md b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-anomaly.md
new file mode 100644
index 0000000..6c2bafe
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-anomaly.md
@@ -0,0 +1,242 @@
+# Command: metric-anomaly
+
+Detect point-in-time anomalies in a single metric — recent spikes, drops, and
+clusters. Produces a verdict on *whether* something unusual happened at a
+specific moment. Does **not** test for trend-level drift (run `metric-drift`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-drift.md` — edits to one must be mirrored in the other._
+
+> _Keep this classification table in sync with the identical block in
+> `metric-drift.md` — edits to one must be mirrored in the other._
+
+> _Keep this classification table in sync with the identical block in
+> `metric-drift.md` — edits to one must be mirrored in the other._
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Purpose |
+|---|---|---|---|
+| Q1-hourly | Last 7 days | `hour` | Recent-blip detection |
+| Q1-daily | Last 30 days | `day` | Recent-day detection against a fuller baseline |
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+Build the `Run-Query` body from `query_template` with only `dateRange` and
+`unit` (granularity) overridden. Use `timeComparison` when a single call can
+cover both windows.
+
+---
+
+## Phase 2 — Outlier tests (Z-score + IQR, time-bucketed)
+
+For each series independently, compute the expected range at every timestamp.
+Run **both** tests; flag a point if **either** test flags it. Report which
+test(s) caught each flag.
+
+### Test 1 — Z-score against time-bucketed mean
+
+- For the **hourly** series: group all points by hour-of-day (0–23) and day-of-week (7 × 24 = 168 buckets). Compute mean (μ) and stddev (σ) per bucket across the 7-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- For the **daily** series: group by day-of-week (7 buckets). Compute μ and σ across the 30-day window. Flag any point where `|value - μ| / σ > 2.5`.
+- Handle low-variance buckets: if σ is <5% of μ, skip the Z-score for that bucket and fall back to IQR only (division by tiny σ creates false alarms).
+
+### Test 2 — IQR against time-bucketed median
+
+- Same bucketing scheme as Test 1.
+- For each bucket, compute Q1, median, Q3, and IQR = Q3 − Q1.
+- Flag any point where `value < Q1 − 1.5 × IQR` or `value > Q3 + 1.5 × IQR`.
+
+### Deviation magnitude
+
+For every flagged point, report `(value − median) / median` as a signed
+percentage. This is what the CSA actually cares about, not the Z-score itself.
+
+### Classify each flagged timestamp
+
+- **Isolated spike/drop** — one point flagged, neighbors normal. Most likely a real anomaly (outage, release, data gap).
+- **Cluster** — 2+ consecutive points flagged in the same direction. Could be a short incident *or* the leading edge of drift. Flag as ambiguous and note that `metric-drift` may be a better follow-up.
+- **Edge-of-window cluster** — flagged points are the most recent N points. Strongly suggestive of drift, not anomaly. Recommend running `metric-drift` before treating as an anomaly incident.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether anything was flagged. A stable chart
+is the visual proof of stability and saves the CSA from second-guessing.
+
+**Top chart: 7-day hourly view** (Q1-hourly series)
+- Line for the hourly series.
+- Dots for every flagged hourly point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 7 days, hourly`.
+
+**Bottom chart: 30-day daily view** (Q1-daily series)
+- Line for the daily series.
+- Dots for every flagged daily point — red for drops, amber for spikes. Omit entirely if no flags.
+- Label the most recent flagged point inline with timestamp and deviation %.
+- Title: `<metric_name> — last 30 days, daily`.
+
+Both charts share x-axis type (date/time) but not range — render as two
+separate plots in one widget, stacked, with consistent y-axis formatting.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ ANOMALY VERDICT ━━
+Hourly series (7d):  <Clean | N flagged | Edge cluster — possible drift>
+Daily series (30d):  <Clean | N flagged | Edge cluster — possible drift>
+
+━━ TOP FLAGS ━━
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+<timestamp>  <value>  <deviation %>  [isolated | cluster | edge]  (z-score | IQR | both)
+... (cap 5; omit section entirely if no flags)
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is point-in-time anomaly detection only. Trend-level drift is not
+tested here — run `metric-drift` for that.
+```
+
+#### Headline phrasing discipline
+
+- No flags: "Metric is stable at the point-in-time level — no anomalies in the last 7 or 30 days."
+- Isolated flag(s): "Metric had a [spike/drop] of X% on [date]. Baseline otherwise stable."
+- Cluster or edge cluster: "Metric has [N] anomalies concentrated in the last [window] — likely the leading edge of drift. Recommend running `metric-drift` next."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+If >10 flags total across both series, cap the TOP FLAGS list at 5 entries
+sorted by deviation magnitude descending and add a note to the headline:
+"18 anomalies flagged in the last 7 days — the metric is either undergoing a
+regime shift or the baseline model is wrong. Run `metric-drift` before
+treating any single point as actionable."
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-anomaly",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-hourly", window: "last 7 days", granularity: "hour",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-daily",  window: "last 30 days", granularity: "day",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    hourly: [ { timestamp, value, deviation_pct, classification, test } , ... ],
+    daily:  [ { timestamp, value, deviation_pct, classification, test } , ... ]
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** The hourly view is usually too noisy for a multi-step
+funnel at low volume. Drop Q1-hourly and run Q1-daily only (last 14 days
+instead of 30 to stay lightweight). Note in output: "Hourly anomaly detection
+skipped — funnel volume too low at hourly granularity."
+
+**Retention metrics:** Retention is a rolling cohort metric — point-in-time
+anomaly detection mostly doesn't apply. Tell the user directly and recommend
+`metric-drift` instead, which has a cohort-over-cohort fallback for retention.
+
+**Very low-volume metrics (<100 events/day):** Skip Q1-hourly and run
+Q1-daily only — the Poisson noise floor dominates at hourly granularity.
+State this in the output.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not test for trend-level drift.** That's `metric-drift`.
+- **Does not attribute cause.** Root-cause investigation is out of scope for this command — run `metric-rca` after detection.
+- **Does not produce recommendations beyond "run drift" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean anomaly verdict in under 30 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-drift.md b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-drift.md
new file mode 100644
index 0000000..12e9456
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-drift.md
@@ -0,0 +1,319 @@
+# Command: metric-drift
+
+Detect trend-level drift in a single metric — whether the baseline itself has
+shifted over recent weeks. Produces a verdict on *whether* the metric is in a
+new regime. Does **not** test for point-in-time anomalies (run `metric-anomaly`
+for that).
+
+---
+
+## Prerequisites
+
+Before this command runs, Steps 0, 1, and 1.5 from `SKILL.md` must have
+completed — input validation, normalized metric series object, and project
+profile resolution. If any of those haven't happened, do them first.
+
+If the user's input is a saved report but the metric is a **funnel** or
+**retention** report, see the "Special cases" section at the bottom.
+
+### Prerequisite — classify `metric_type`
+
+Before firing any queries, classify the metric into one of:
+`count`, `unique_count`, `ratio`, `funnel`, `retention`, `unknown`.
+
+| Detected | Classification |
+|---|---|
+| Report type `funnels` | `funnel` |
+| Report type `retention` | `retention` |
+| Query template has A/B form or `% of total` (conversion rate, session rate, etc.) | `ratio` |
+| Single-series count (event count, event count distinct users) | `count` |
+| Single-series unique count | `unique_count` |
+| Formula metric / custom SQL / anything else | `unknown` |
+
+Store as `metric_type` on the metric series object. Used in the verdict card
+and in special-case routing (funnel, retention).
+
+> _Keep this classification table in sync with the identical block in
+> `metric-anomaly.md` — edits to one must be mirrored in the other._
+
+### Prerequisite — name the drift and baseline windows
+
+The naming convention used throughout this command's output:
+
+- **`drift_window`** — the **recent** 30 days (most recent 30 days ending today).
+- **`baseline_window`** — the **prior** 30 days (30 days ending 30 days before today).
+
+Both windows are computed from Q1-daily. The weekly test uses 8 vs 8 weeks —
+those windows are reported alongside but are secondary to the daily windows
+for headline purposes.
+
+---
+
+## Phase 1 — Fetch series (2 queries, parallel)
+
+Fire both `Run-Query` calls simultaneously:
+
+| Query | Window | Granularity | Comparison |
+|---|---|---|---|
+| Q1-daily | Last 60 days | `day` | Last 30 days vs. prior 30 days |
+| Q1-weekly | Last 16 weeks | `week` | Last 8 weeks vs. prior 8 weeks |
+
+The 60-day daily view catches medium-term drift. The 16-week weekly view
+catches slow drift that the daily window would miss because daily noise
+drowns the signal. Running both is cheap and they answer different questions.
+
+Use the `query_template` from the metric object; override only `dateRange`
+and `unit` (granularity). Do not re-apply filters — they're already baked in.
+
+---
+
+## Phase 2 — Drift tests (mean shift + variance ratio)
+
+### Window split & contamination check
+
+For each series, split into `recent` and `prior` halves (no overlap).
+
+**Lightweight anomaly contamination check** (important because this command
+can run standalone without `metric-anomaly` having run first):
+
+Scan the `recent` window for obvious outliers using a simple rule — any point
+more than 3σ from the window mean. If ≥20% of points in the `recent` window
+qualify → flag **"drift test potentially contaminated by outliers in the
+recent window"** and mark all drift findings as low-confidence. Recommend the
+user run `metric-anomaly` first.
+
+If 0–20% of points qualify, proceed normally but note the count in the
+verdict card's contamination section.
+
+This is deliberately lighter than `metric-anomaly`'s full time-bucketed
+test — its job here is only to flag contamination risk, not to produce a
+publishable anomaly verdict.
+
+### Test 1 — Mean shift (level drift)
+
+```
+mean_recent  = mean(recent_window)
+mean_prior   = mean(prior_window)
+level_delta  = (mean_recent − mean_prior) / mean_prior    # signed %
+```
+
+Flag thresholds:
+- `|level_delta| < 5%` → no meaningful shift
+- `5% ≤ |level_delta| < 15%` → moderate drift
+- `|level_delta| ≥ 15%` → significant drift
+
+Additionally compute a Welch's t-test on the two windows. If p < 0.05 and
+`level_delta ≥ 5%`, drift is statistically supported. If p ≥ 0.05, note the
+shift is observational but not statistically distinguishable from noise.
+
+### Test 2 — Variance ratio (volatility drift)
+
+```
+var_ratio = variance(recent_window) / variance(prior_window)
+```
+
+Flag thresholds:
+- `0.67 ≤ var_ratio ≤ 1.5` → variance stable
+- `var_ratio > 1.5` → metric got noisier (investigate instrumentation, cohort mix)
+- `var_ratio < 0.67` → metric got smoother (often a sign of flatlining or saturation)
+
+Variance drift without level drift is an under-appreciated signal — the
+headline number looks fine but something structural changed. Always surface
+it separately.
+
+Distribution-shape tests (KS, PSI) are intentionally **not** part of this
+battery. They require per-user or per-segment values, which Mixpanel's MCP
+surface does not return at practical cost.
+
+### Combine into a per-series verdict
+
+| Verdict | When |
+|---|---|
+| **No drift** | Level stable AND variance stable |
+| **Level drift** | Level shifted ≥5%, variance stable |
+| **Variance drift** | Level stable, variance ratio outside 0.67–1.5 |
+| **Compound drift** | Both |
+
+Also report **direction** (up / down) and **magnitude** (% for level, ratio
+for variance).
+
+### Reconcile the two series
+
+The 60-day-daily and 16-week-weekly views should agree on direction. If they
+disagree:
+
+- **Weekly says drift, daily says none** → slow drift that daily noise hides. Trust the weekly.
+- **Daily says drift, weekly says none** → recent movement that hasn't accumulated into the weekly window yet. Could be the leading edge of real drift, or a contained incident. Trust the daily but note the weekly hasn't confirmed.
+- **Both agree** → high confidence, state it.
+
+### Classify drift shape
+
+If drift is flagged, classify its shape using the daily series for use in
+the verdict card:
+
+| Condition | `verdict_shape` value |
+|---|---|
+| Single-day change point where mean shift before vs after explains ≥60% of variance, and before/after segments are each <20% within-segment variance | `step` (record the change-point date) |
+| Linear regression fit to the full 60-day series has R² ≥ 0.5 and non-zero slope | `slope` |
+| 7-day autocorrelation on residuals ≥ 0.5, and periodicity strength differs between drift and baseline windows | `oscillating` |
+| None of the above fit cleanly | `unclassified` |
+
+**Shape precedence**: if multiple shapes fit, use this priority:
+`step` > `slope` > `oscillating` > `unclassified`. (Step changes are the
+most actionable; surface them first when ambiguous.)
+
+If no drift was flagged, skip shape classification entirely.
+
+---
+
+## Phase 3 — Summarise + charts + handoff
+
+Produces **three things**, in order:
+
+1. **A single visualizer widget with two charts stacked vertically**
+2. **A compact verdict card**
+3. **A diagnosis payload** handed back to the skill-level flow (Step 2 in
+   `SKILL.md`) for the board prompt and `metric-rca` caching
+
+### The charts — always rendered
+
+Both charts render regardless of whether drift was detected. A stable chart
+is the visual proof of stability.
+
+**Top chart: 60-day daily view** (Q1-daily series)
+- Line for the daily series.
+- **Shaded band** for the prior 30-day baseline window (subtle grey fill).
+- **Shaded band** for the recent 30-day drift window — red-tinted fill if drift is `down`, green-tinted if `up`, amber-tinted if `mixed`, grey if no drift.
+- Horizontal line for `mean_prior` (dashed grey).
+- Horizontal line for `mean_recent` (dashed, colored to match drift direction).
+- If `verdict_shape = step`, annotate the change-point date with a vertical dashed line.
+- Title: `<metric_name> — last 60 days, daily`.
+
+**Bottom chart: 16-week weekly view** (Q1-weekly series)
+- Line for the weekly series.
+- **Shaded band** for the prior 8-week baseline window (subtle grey fill).
+- **Shaded band** for the recent 8-week drift window — same direction-based coloring as above.
+- Horizontal lines for `mean_prior_weekly` (dashed grey) and `mean_recent_weekly` (dashed, colored).
+- Title: `<metric_name> — last 16 weeks, weekly`.
+
+Both charts share x-axis type (date) and consistent y-axis formatting.
+Render as two separate plots in one widget, stacked.
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]` once if
+not already loaded this session. Do not narrate the read_me call to the user.
+
+If chart generation fails, fall back to card-only output with the note
+"Chart unavailable — card below." Do not block on the chart.
+
+### The compact verdict card
+
+```
+METRIC: <metric_name> — <project_id>
+DEFINITION: <one-sentence what-it-measures>
+
+━━ DRIFT VERDICT ━━
+60-day / daily view:   <verdict>  <direction>  <magnitude>  (t-test p = <p>)
+16-week / weekly view: <verdict>  <direction>  <magnitude>
+Reconciled verdict:    <one sentence>
+Shape:                 <step | slope | oscillating | unclassified>  <change-point date if step>
+
+━━ CONTAMINATION ━━
+<none | recent window contains N outliers — drift confidence downgraded; recommend metric-anomaly first>
+
+━━ HEADLINE ━━
+<one sentence the CSA could paste into a customer Slack>
+
+━━ CONFIDENCE ━━
+<high | medium | low> — <reason for any hedge>
+
+━━ NEXT STEP ━━
+<one concrete action>
+
+━━ WHAT THIS ISN'T ━━
+This is trend-level drift detection only. Point-in-time anomalies are not
+tested here — run `metric-anomaly` for that.
+```
+
+#### Headline phrasing discipline
+
+- No drift: "Metric is stable — trend has not shifted in the last 30 days or 8 weeks."
+- Level drift: "Metric has drifted [up/down] by X% over the last 30 days. [Weekly view confirms / Weekly view hasn't confirmed yet]."
+- Variance drift only: "Metric level is stable but volatility has [increased/decreased] — variance ratio [X.XX]. Something structural changed without moving the headline."
+- Compound drift: "Metric has drifted [up/down] by X% AND volatility changed. Compound drift — investigate both level and structure."
+- Contamination flag: append "Drift confidence is low — recent window has N outlier points. Run `metric-anomaly` first to clean up before attributing."
+
+Never lead with a confidence hedge. State the finding, then qualify it.
+
+### The diagnosis payload
+
+After rendering the charts and verdict card, assemble the payload defined
+in `SKILL.md` Step 2 and hand it back to the skill-level flow:
+
+```
+{
+  command: "metric-drift",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  queries: [
+    { label: "Q1-daily",  window: "last 60 days",  granularity: "day",
+      run_query_body: <body used>, result: <series> },
+    { label: "Q1-weekly", window: "last 16 weeks", granularity: "week",
+      run_query_body: <body used>, result: <series> }
+  ],
+  verdict_card: <full rendered card above>,
+  headline: <the HEADLINE line from the card>,
+  flags: {
+    daily:  { verdict, direction, level_delta, var_ratio, t_test_p, shape, change_point_date },
+    weekly: { verdict, direction, level_delta, var_ratio },
+    reconciled: <one-line reconciled verdict>,
+    contamination: { outlier_count, contaminated: bool }
+  }
+}
+```
+
+The skill-level flow (Step 2 in `SKILL.md`) then asks the user about the
+board and caches the payload for `metric-rca`. Do **not** ask the board
+question from inside this command — that lives at the skill level so a
+user running anomaly → drift back-to-back gets asked once at the end,
+not twice.
+
+---
+
+## Special cases
+
+**Funnel metrics:** Phase 1 and Phase 2 work as-is for multi-step funnels
+— the overall conversion series is what drifts. No special handling needed.
+
+**Retention metrics:** Retention is a rolling cohort metric — "drift" on a
+retention curve means cohort-over-cohort degradation. Replace the 60-day
+daily and 16-week weekly splits with a cohort-over-cohort comparison: last
+8 cohorts vs. prior 8 cohorts on the same retention day (D1, D7, D30). Flag
+which retention day shifted. Note in the verdict card: "Retention
+cohort-over-cohort comparison used in place of daily/weekly split."
+
+**Very low-volume metrics (<100 events/day):** The tests still apply but
+statistical confidence drops sharply. Downgrade confidence to `low` regardless
+of `level_delta` magnitude and note: "Low-volume metric — drift signal may be
+Poisson noise."
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| Either query fails | Retry once. If still failing, mark that series partial, continue the other, note in output. |
+| Both queries fail | Stop. Report the failure and ask the user to verify project access. |
+| Project requires a filter the user didn't provide | Ask once, then proceed. Don't guess. |
+| Metric returns zero events in window | Stop. The metric is either broken or the filter excludes everything. Report as a possible data quality issue; do not proceed to Phase 2. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not detect point-in-time anomalies.** That's `metric-anomaly`.
+- **Does not attribute cause.** Root-cause investigation is handled by `metric-rca` after detection.
+- **Does not produce recommendations beyond "run anomaly first" / "run RCA".** The verdict is the product.
+
+Keep the surface narrow. A clean drift verdict in under 60 seconds is more
+useful than a sprawling analysis that tries to do everything.
diff --git a/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-rca.md b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-rca.md
new file mode 100644
index 0000000..ac2bf98
--- /dev/null
+++ b/plugins/mixpanel-mcp/skills/monitor-metrics/commands/metric-rca.md
@@ -0,0 +1,484 @@
+# Command: metric-rca
+
+Root-cause investigation for a flagged metric. Takes the diagnosis payload
+from a prior `metric-anomaly` or `metric-drift` run and fans out across a
+set of segmentation branches to localise *where* the movement concentrated.
+Produces a ranked list of findings and appends them to the diagnosis board
+the user already created.
+
+This command does **not** re-run anomaly or drift detection. It assumes the
+movement has already been established — its job is attribution, not
+detection.
+
+---
+
+## Prerequisites
+
+Before this command runs, the session must hold a **diagnosis payload** in
+conversation memory from an earlier `metric-anomaly` or `metric-drift` run
+(see `SKILL.md` Step 2). The payload carries the project, metric, metric
+type, date ranges, flagged points or drift windows, and the query bodies
+used.
+
+If no payload exists, do **not** attempt to run RCA from a cold start. Tell
+the user: *"RCA runs on top of an existing anomaly or drift diagnosis. Run
+`metric-anomaly` or `metric-drift` first, then come back here."* Stop.
+
+### Board state
+
+If the user persisted the diagnosis as a Mixpanel board (Step 2 in
+`SKILL.md`), the payload will include `diagnosis_board_id`. This command
+**appends** to that board — it does not create a new one. If no board was
+created, skip the append step at the end and just return the findings
+inline; do not silently create a new board.
+
+### Ask once — business / market context
+
+Before firing Branch 5, ask the user exactly once:
+
+> *"What business or market is this metric tied to? (e.g., Indian
+> e-commerce, Indian OTT streaming, SEA fintech.) I'll use this to check
+> whether the flagged dates line up with festivals, launches, or
+> category-specific events."*
+
+Hold the answer as `business_context`. If the user skips or says "not
+relevant", skip Branch 5 entirely — do not guess the market from project
+name or memory.
+
+---
+
+## Phase 1 — Branch selection + parallel fan-out
+
+Read the payload and decide which branches to run. Every branch runs
+against the **same date ranges** the source command used:
+
+- `metric-anomaly` payload → use 7-day hourly + 30-day daily windows.
+- `metric-drift` payload → use 60-day daily + 16-week weekly windows, with
+  recent vs prior window comparison preserved.
+
+If both payloads exist in the session (user ran anomaly then drift),
+prefer the drift payload's date ranges — RCA over a longer window is more
+useful — and annotate findings with the anomaly payload's flagged
+timestamps for cross-reference.
+
+### Branch selection matrix
+
+| Branch | Purpose | Runs when |
+|---|---|---|
+| **Branch 1 — Component decomposition** | Break ratio/funnel/retention into its component events + metric-definition filters | `metric_type ∈ {ratio, funnel, retention}` |
+| **Branch 2 — Default-property breakdowns** | Source → geography → client-specific split | Always |
+| **Branch 3 — Distinct-ID outliers** | Find whether a small set of users drove the movement | Anomaly payload only. Skip if in-window distinct user count >10k |
+| **Branch 4 — Cohort comparison** | Run the metric filtered to the cohorts the user names to find concentration in named user segments | The user named one or more cohorts (or referenced a cohort in their ask) |
+| **Branch 5 — Calendar context** | Check whether flagged dates line up with festivals, launches, category events in `business_context` | `business_context` provided |
+
+Run all selected branches **in parallel** via concurrent `Run-Query` calls.
+Each branch can issue multiple queries; batch within a branch sequentially
+if one query's result informs the next (Branch 2's second level depends
+on the first).
+
+---
+
+## Branch 1 — Component decomposition
+
+Only runs for `ratio`, `funnel`, and `retention` metrics. The question:
+*is the movement in the numerator, the denominator, or a specific step?*
+
+**If the metric came from a saved Mixpanel Metric** (`metric_id` is set on
+the payload), read the component events, formula, and filters straight from
+the `Get-Metric` definition rather than re-deriving them — the definition is
+authoritative and avoids guessing the numerator/denominator. Fall back to
+the derivation below only when no saved-Metric definition is available.
+
+### For `ratio`
+1. Pull numerator event as a standalone count series (same window,
+   granularity, and filters from the metric definition).
+2. Pull denominator event as a standalone count series (same window,
+   granularity, and filters).
+3. Compare each component's deviation % against the ratio's overall
+   deviation %. Flag which component moved.
+4. If both components moved in the same direction by similar magnitude →
+   the ratio is stable but volumes shifted. Note as a volume story, not a
+   conversion story.
+5. If only one moved, or they moved opposite directions → the ratio
+   shift is concentration-driven. Identify which.
+
+### For `funnel`
+1. Run the **same funnel definition** twice as `report_type=funnels` via
+   `Run-Query`: once for the recent (drift/anomaly) window, once for the
+   baseline window. The native funnels response returns step conversion
+   rates and absolute counts per step.
+2. For each step pair, compute the conversion-rate delta between recent
+   and baseline.
+3. Flag the **specific step pair** with the largest absolute conversion
+   drop. One step usually owns the drop; surface that pair as the
+   headline finding.
+4. If the funnel has step-level filters (e.g. property filters on
+   individual steps), do not decompose into standalone event counts —
+   the filters change the meaning. The native funnels query is the only
+   faithful comparison.
+
+This replaces the prior "pull each funnel step as a standalone event
+count" approach. Standalone event counts ignore step ordering and
+step-level filters; the native funnels report does not.
+
+### For `retention`
+1. Pull the cohort-defining event as a standalone count series.
+2. Pull the return event as a standalone count series.
+3. Check whether cohort size changed, return count changed, or both.
+4. A drop in retention with stable return count + larger cohort is a mix
+   effect; a drop in return count with stable cohort is real attrition.
+
+### Event × metric-definition filter combinations
+
+For every component event above, re-run it with **each filter from the
+metric definition applied independently** (i.e. one filter at a time, not
+all combinations — combinatorial blowup is not useful here). This shows
+whether a specific filter value concentrates the movement.
+
+Example: if the metric definition has `user_type = premium` baked in,
+and the numerator event is `video_play`, run:
+- `video_play` with no filter
+- `video_play` with `user_type = premium` (the baked filter) — this
+  should match the metric's numerator
+- `video_play` broken down **by** `user_type` (all values) — exposes
+  whether the movement is specific to `premium` or shared across the
+  population.
+
+Cap at 5 filter values per property breakdown; drop the long tail.
+
+---
+
+## Branch 2 — Default-property breakdowns
+
+Two-level cascade. Always runs.
+
+### Level 1 — Source segmentation
+
+Break down the metric by the SDK / ingestion source. Two properties
+together:
+
+- Event property `mp_lib` (string) — SDK name (e.g. `web`, `android`,
+  `iphone`, `swift`, `python`, `ruby`, `java`).
+- Event property `$import` (boolean) — true for events ingested via the
+  Import API, false for Track API.
+
+Output: a matrix of `mp_lib × $import` with deviation % per cell. The
+goal here is to isolate whether the movement is concentrated in
+client-side vs server-side vs Import API ingestion.
+
+### Level 2 — Conditional breakdowns
+
+The Level 2 slice depends on what Level 1 surfaced. Run the slice whose
+dominant source owns the movement; skip the others.
+
+**For client-side sources (`web`, `android`, `iphone`, `swift`, etc.):**
+Common first slice — geography in a step function:
+- Event property `$os`
+- Event property `platform` (or the project's equivalent; check the
+  metric definition or fall back to `mp_lib` if not present)
+- Event property `mp_country_code`
+- Event property `$region`
+- Event property `$city`
+
+Run these as a **step function**, not a cross-product: start with
+`mp_country_code`. If one country owns >50% of the movement, break that
+country down by `$region`. If one region owns >50%, break by `$city`.
+Stop when the concentration flattens.
+
+**For `web` specifically:**
+- Event property `$device`
+- Event property `utm_source`
+- Event property `$browser`
+
+**For `android` / `iphone` / `swift` / `ios`:**
+- Event property `$app_version_string`
+- Event property `$model`
+
+Run these as single-property breakdowns, not two-level (avoids the
+high-cardinality two-level truncation risk that bites large projects).
+
+### Cardinality discipline
+
+- Any breakdown returning exactly 1,000 / 3,000 / 10,000 rows is
+  potentially truncated — flag in findings, do not treat the result as
+  exhaustive.
+- If a two-level breakdown (`mp_lib × $import`) is used, keep the
+  first-level cardinality bounded: if `mp_lib` returns >20 distinct
+  values, filter to the top 10 by volume before running the second
+  level.
+
+---
+
+## Branch 3 — Distinct-ID outliers
+
+Only runs for anomaly payloads. Goal: is a small set of users
+responsible for the flagged point(s)?
+
+### Cardinality gate
+
+Before running, check in-window distinct user count against the metric's
+base query. If >10,000 distinct users contributed to the metric in the
+flagged window, skip this branch and note "Branch 3 skipped — user
+cardinality too high for outlier detection via MCP." A top-N breakdown
+on 100k users returns noise.
+
+### If within cardinality
+
+1. Break the metric down by `distinct_id` for the flagged window only
+   (not the whole series — this keeps the query tractable).
+2. Rank users by their contribution to the metric in the flagged window.
+3. Flag outliers: users whose contribution in the flagged window is
+   >5σ above the median user's contribution, OR users who appear in
+   the flagged window but not in the baseline window.
+4. Cap output at the top 20 distinct_ids by deviation.
+
+If the top 5 users account for >30% of the movement → strong user-driven
+outlier signal. Surface this prominently. Could be bots, internal test
+traffic, or a single high-volume customer.
+
+### Optional follow-up — session replay context
+
+If the top 3 distinct_ids each account for ≥10% of the movement individually,
+offer the user a follow-up: *"Top user(s) `<distinct_id>` drove [X]% of the
+flagged window. Want me to pull their session replays from that window so
+you can see what they did?"*
+
+If the user says yes, call `Get-User-Replays-Data` for each flagged
+distinct_id with `from_date` and `to_date` set to the flagged window. Cap at
+3 distinct_ids and 5 replays per user. Surface the replay URLs + timestamps
+in the findings card under the Branch 3 section.
+
+This is **opt-in only** — do not pull replays automatically. Replays add
+value when the customer wants the "what did they actually do" answer, but
+they're noisy if Session Replay isn't widely enabled in the project. Ask
+once, run if confirmed, skip if declined.
+
+---
+
+## Branch 4 — Cohort comparison
+
+Goal: is the movement concentrated in a specific user cohort the customer
+already cares about? Cohorts are typically the most CSA-actionable RCA
+signal — "your churn-risk cohort dropped 40%" is a far better headline than
+"users on iOS 17.4 dropped 40%."
+
+### Step 1 — Identify candidate cohorts
+
+The Mixpanel MCP surface has **no cohort-listing tool** — `Search-Entities`
+does not support a `cohort` entity type (its types are insights, funnels,
+flows, retention, dashboard, launch-analysis, experiments, feature-flags,
+metric-trees, playlists, heat-maps). Branch 4 therefore cannot auto-discover
+cohorts; source them from the user instead:
+
+1. If the user named cohorts in their original ask (e.g. "is this happening
+   in our power users?"), use those.
+2. Otherwise, ask once: *"Want me to compare against any saved cohorts? If
+   so, name them (or share their cohort IDs) and I'll filter the metric to
+   each."*
+
+If the user names no cohorts (or declines) → record *"Branch 4 skipped — no
+cohorts named; cohort auto-discovery isn't available on the MCP surface."*
+and continue.
+
+### Step 2 — Resolve the named cohorts
+
+Cap at the **top 5 cohorts** the user named. For each, resolve its
+`cohort_id` — the user may give a name or an id; if only a name is given,
+confirm it back before filtering. If the user named more than 5, ask which
+five matter most.
+
+Surface the cohort names in the findings — the customer recognizes their
+own cohort names and that's part of the value.
+
+### Step 3 — Run the metric filtered by each cohort
+
+For each selected cohort, run the same `query_template` as the headline
+metric, with one cohort-membership filter added. The exact filter shape
+comes from `Get-Query-Schema` — Mixpanel's query schema accepts cohort
+membership as a filter on `distinct_id` referencing the cohort_id.
+
+Run all cohort queries in parallel via concurrent `Run-Query` calls. Each
+query covers the same date window the source command used (drift window
+or anomaly window).
+
+### Step 4 — Score and rank
+
+For each cohort, compute the same concentration + deviation scores used
+in the Phase 2 ranking step (cohort_delta_abs / total_delta_abs and the
+cohort's own deviation %). Treat cohorts as candidate findings the same
+way property breakdowns are treated.
+
+A cohort is **important** if either:
+- It explains ≥30% of the headline movement (lower threshold than the
+  default 40% — cohorts are smaller slices than top-level properties,
+  and 30% concentration in a named cohort is a strong signal), OR
+- Its individual deviation is ≥1.5× the headline metric's deviation.
+
+### Error handling
+
+| Situation | Response |
+|---|---|
+| User names no cohorts | Skip branch, record reason. |
+| A cohort filter fails in `Run-Query` (cohort schema mismatch) | Retry once. If still failing, skip that cohort, continue others, note in branch coverage. |
+| All cohort queries fail | Skip branch, note "Branch 4 skipped — cohort filtering failed across all cohorts." |
+
+---
+
+## Branch 5 — Calendar context
+
+Only runs if the user provided `business_context`.
+
+1. Identify the key dates in the flagged window. For anomaly payloads,
+   use the timestamps from `payload.flags.hourly` and `payload.flags.daily`.
+   For drift payloads, use the change-point date if `shape = step`, or
+   the start of the drift window otherwise.
+2. Run a `web_search` with a query built from `business_context` + the
+   relevant date(s). Example: if `business_context = "Indian e-commerce"`
+   and the change-point is `2026-03-08`, search `"Indian e-commerce
+   events March 8 2026 festival sale"`. If `web_search` isn't available in
+   this runtime, skip Branch 5 and record *"Branch 5 skipped — web search
+   unavailable in this runtime"* (mirrors the no-`business_context` skip);
+   the other four branches still run.
+3. Look for matches: religious festivals, cricket fixtures, sale events
+   (BBD, EOSS, GOSF), product launches, regulatory dates (e.g. RBI policy
+   announcements).
+4. If a plausible match surfaces, include it in findings with a
+   confidence label: `strong` (exact date match, major event), `moderate`
+   (same week, category-aligned), `weak` (same month, tangential).
+5. If nothing surfaces, record: *"No calendar events found for
+   `<business_context>` on the flagged dates."*
+
+This branch is **context**, not **evidence**. Phrase findings as "the
+flagged date falls on [event]" — never as "the [event] caused the
+movement." Correlation only; causation belongs to the customer.
+
+---
+
+## Phase 2 — Synthesise, rank, visualise
+
+### Rank findings
+
+For every branch, each sub-segment (a `mp_lib` value, a country, a funnel
+step, a distinct_id, etc.) is a candidate finding. Score each:
+
+- **Concentration score** — share of the total movement this segment
+  explains. `segment_delta_abs / total_delta_abs`. A segment with 70%
+  concentration is worth surfacing; 5% is not.
+- **Deviation score** — this segment's deviation % compared to its own
+  baseline. A segment that individually deviated 40% is stronger signal
+  than one that deviated 5%.
+
+Flag a finding as **"important"** if **either** of these is true:
+- Concentration score ≥ 0.4 (one segment owns ≥40% of the movement), OR
+- Segment deviation ≥ 1.5× the headline metric's deviation (the movement
+  concentrates here).
+
+Cap total important findings at 6. If more than 6 qualify, keep the top 6
+by concentration × deviation combined rank.
+
+### Visualise important findings
+
+Render a single visualizer widget containing one chart per important
+finding, stacked vertically. Chart type by branch:
+
+| Branch | Chart |
+|---|---|
+| Branch 1 (component) | Two-line overlay: headline metric vs component metric, same window, same granularity |
+| Branch 2 (property breakdown) | Horizontal bar chart, one bar per segment, bar length = deviation %, color-coded by direction |
+| Branch 3 (distinct_id) | Horizontal bar chart, top-N users by contribution % in flagged window |
+| Branch 4 (cohort) | Horizontal bar chart, one bar per important cohort, bar length = deviation %, color-coded by direction |
+| Branch 5 (calendar) | No chart — rendered as an annotation in the written findings block |
+
+Before generating, read `visualize:read_me` with `modules: ["chart"]`
+once if not already loaded this session. Do not narrate the read_me call.
+
+### The findings card
+
+```
+METRIC: <metric_name> — <project_id>
+DIAGNOSIS SOURCE: <metric-anomaly | metric-drift | both>
+WINDOW: <window described in the same language as the source verdict card>
+
+━━ HEADLINE ━━
+<one sentence naming the strongest finding, or "No single segment concentrates the movement — treat as distributed.">
+
+━━ IMPORTANT FINDINGS (ranked) ━━
+1. [Branch N] <segment description> — <concentration %> of movement,
+   <deviation %> vs baseline. <one-line interpretation>.
+2. ...
+(cap 6; omit section if no important findings)
+
+━━ BRANCH COVERAGE ━━
+Branch 1 (component):        <ran | skipped — reason>
+Branch 2 (default props):    <ran | skipped — reason>
+Branch 3 (distinct_id):      <ran | skipped — reason>
+Branch 4 (cohort):           <ran + N cohorts compared | skipped — no cohorts named>
+Branch 5 (calendar):         <ran + N events found | skipped — no business context>
+
+━━ WHAT THIS ISN'T ━━
+This is attribution by segmentation, not causal analysis. Findings show
+where the movement concentrated; they do not prove what caused it.
+Calendar matches are correlation only.
+```
+
+### The RCA payload (passed back to SKILL.md)
+
+After rendering the findings card + charts, hand back to the skill-level
+flow:
+
+```
+{
+  command: "metric-rca",
+  project_id, project_name, metric_id,
+  metric_name, metric_definition, metric_type,
+  source_payload_command: "metric-anomaly" | "metric-drift",
+  business_context: <string or null>,
+  rca_queries: [
+    { branch: int, label: str, run_query_body: dict, result: dict }, ...
+  ],
+  important_findings: [
+    { branch: int, segment: str, concentration_pct: float,
+      deviation_pct: float, interpretation: str,
+      chart_spec: dict },
+    ... (cap 6)
+  ],
+  findings_card: <full rendered card above>,
+  headline: <the HEADLINE line>,
+  diagnosis_board_id: <from source payload, or null>
+}
+```
+
+The skill-level flow (Step 3 in `SKILL.md`, added with this command)
+handles the board append.
+
+---
+
+## Error handling
+
+| Situation | Response |
+|---|---|
+| No diagnosis payload in session | Stop. Tell user to run `metric-anomaly` or `metric-drift` first. |
+| A branch query fails | Retry once. If still failing, mark that branch partial, continue others, note in branch coverage. |
+| All branches fail | Stop. Report failure and ask the user to verify project access. |
+| Branch 2 Level 1 returns only one `mp_lib × $import` cell with meaningful volume | Skip Branch 2 Level 2 conditional logic; run the fallback geography step function directly. |
+| User declines to provide `business_context` | Skip Branch 5 entirely, proceed with others. |
+| `web_search` unavailable in this runtime | Skip Branch 5, record "Branch 5 skipped — web search unavailable." Other branches continue. |
+| No important findings after ranking (all segments <40% concentration and <1.5× deviation) | Surface that finding: "Movement is distributed across segments — no single dimension concentrates it." This is a valid, useful result. |
+
+---
+
+## What this command deliberately doesn't do
+
+- **Does not re-run anomaly or drift detection.** It consumes the payload.
+- **Does not claim causation.** Correlation by segmentation is the ceiling.
+- **Does not cross-join properties combinatorially.** Branch 2 is a
+  step-function cascade, not a cross-product, because high-cardinality
+  two-level breakdowns truncate silently.
+- **Does not source calendar dates from memory.** Always `web_search`
+  with the user-provided `business_context` (skips gracefully if web search
+  is unavailable).
+- **Does not create a new board.** Appends to the existing diagnosis
+  board via the skill-level flow.
+
+Keep the surface narrow. A ranked list of 3-6 concentrated segments with
+charts beats a 40-branch exhaustive report every time.