diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 90e6f1a..e44b5e6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "plugins": [ { "name": "metabase-cli", - "description": "Drive a Metabase instance from the terminal via the `mb` CLI: auth, list/get/create/update/delete on every resource, run queries and transforms, git-sync content to and from a remote. Bundles transform and git-sync references as on-demand skills served by `mb skills get`.", + "description": "Be your data analyst / data engineer for Metabase, from the terminal via the `mb` CLI. Go from raw data to something a non-technical person can use: clean tables, reusable metrics, dashboards, and written answers. Use when someone wants to \"make sense of my data\", \"build a data model\", \"go from raw data to a dashboard\", \"answer questions about my data\", \"report on who registered / signed up / responded\", or \"set up analytics for X\". Also full CRUD on every Metabase resource (cards, dashboards, transforms, queries), git-sync content to and from a remote, and on-demand workflow skills served by `mb skills get`.", "source": "./", "strict": false, "skills": ["./skills/metabase-cli"], diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 42eb365..1f7a8ca 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -35,3 +35,12 @@ jobs: bun-version: latest - run: bun install - run: bun run format:check + + skills: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: stbenjam/skillsaw@v0 + with: + path: skill-data + strict: true diff --git a/.skillsaw.yaml b/.skillsaw.yaml new file mode 100644 index 0000000..d87361d --- /dev/null +++ b/.skillsaw.yaml @@ -0,0 +1,23 @@ +# skillsaw configuration — https://github.com/stbenjam/skillsaw +# Lints the skill collection under skill-data/. Pinned so a skillsaw +# release can't silently change token math and break CI. + +version: "0.11.4" + +rules: + context-budget: + enabled: true + severity: warning + limits: + # Skill bodies are loaded into Claude's context when a skill fires. + # The default warn (3000) is tuned for lean skills; ours are dense + # reference/strategy skills whose de-fluffed floor is higher. The + # always-resident skills (core, robot-data-engineer) are the tightest + # of the set; the larger ones (data-transformation, semantic-layer, + # mbql) are leaf skills loaded for a single stage, where the extra + # tokens are genuine guidance, not fluff. 6000 clears the largest + # honest floor while still catching real future bloat. + skill: + warn: 6000 + skill-description: + warn: 200 diff --git a/README.md b/README.md index 09e9fd0..4154a3d 100644 --- a/README.md +++ b/README.md @@ -1338,12 +1338,15 @@ mb skills path core # one path Bundled skills: -| Name | Use | -| ----------- | -------------------------------------------------------------------------------------- | -| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | -| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | -| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | -| `git-sync` | Round-tripping Metabase content to/from a git remote | +| Name | Use | +| --------------------- | --------------------------------------------------------------------------------------------- | +| `core` | Top-level guide: auth, flag conventions, output flags, body input, every command group | +| `transform` | Authoring and running transforms (native SQL + MBQL 5), iteration, run inspection | +| `data-transformation` | Raw, normalized source database → clean, wide, analysis-ready tables for a non-technical user | +| `semantic-layer` | Turning clean tables into reusable segments, measures, and metrics for a non-technical user | +| `robot-data-engineer` | Front-door router for the whole journey (raw → tables → definitions → dashboards) | +| `document` | Authoring document bodies: the TipTap JSON tree, embedding cards, entity links | +| `git-sync` | Round-tripping Metabase content to/from a git remote | Discovery surfaces: diff --git a/package.json b/package.json index 72b69d7..41f93bb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@metabase/cli", - "version": "0.1.10", + "version": "0.1.11", "description": "Metabase CLI", "license": "AGPL-3.0", "repository": { @@ -37,6 +37,7 @@ "typecheck": "tsc --noEmit", "lint": "oxlint", "lint:fix": "oxlint --fix", + "lint:skills": "uvx skillsaw lint skill-data/ --strict", "format": "oxfmt", "format:check": "oxfmt --check", "sync:representations": "bun run scripts/sync-representations.ts", diff --git a/skill-data/core/SKILL.md b/skill-data/core/SKILL.md index 377e07a..5e3c34e 100644 --- a/skill-data/core/SKILL.md +++ b/skill-data/core/SKILL.md @@ -6,7 +6,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # metabase-cli (core) -The official Metabase CLI (`mb`) drives a Metabase instance over its REST API. It covers auth, list/get/create/update/delete on every resource, query and transform execution, content search, git-sync (representations ↔ instance), and entity-id translation. +The official Metabase CLI (`mb`) drives a Metabase instance over its REST API: auth, list/get/create/update/delete on every resource, query and transform execution, content search, git-sync (representations ↔ instance), and entity-id translation. Top-level command groups (run `mb --help` to discover verbs): @@ -15,7 +15,7 @@ auth | db | table | field | query | card | dashboard | snippet | segment | measu document | transform | transform-job | setting | search | git-sync | setup | eid | uuid | upgrade | skills ``` -The patterns below — auth, flag conventions, output flags, body input — apply across **every** group. Per-command flags, examples, and output schemas live in `mb __manifest` (see below). A few flows have their own specialized skills; load them on demand (see "Specialized skills"). Authoring any query body (cards, transforms, measures, segments, ad-hoc `mb query`) is one — load `mbql` whenever you build MBQL by hand. When a **question** (card) needs a query, prefer MBQL over native SQL — it's portable across warehouse engines and the CLI pre-flight-validates it. Try it first, but don't force it: fall back to native SQL when MBQL can't express the query, or when an MBQL body keeps failing server-side and you can't resolve it. +The patterns below — auth, flag conventions, output flags, body input — apply across **every** group. Per-command flags, examples, and output schemas live in `mb __manifest` (see below). A few flows have their own specialized skills (see "Specialized skills"). When a card needs a query, prefer MBQL over native SQL (portable, pre-flight-validated) — load `mbql`; fall back to native SQL when MBQL can't express it. ## Auth & profiles @@ -29,11 +29,11 @@ mb auth status --json # → {profile, present, url} for the d mb auth status --profile --json # → status of a specific profile ``` -`auth list` is the primary enumeration path — one call returns every configured profile with sanitized URL, an `authenticated` flag, and a probe `status` (`ok` / `auth-failed` / `network-error` / `server-error` / `not-probed`). Use it before asking the user which profile to pick. If it returns an empty `data: []`, ask the user to run `mb auth login` themselves (see the policy above) and tell you the profile name. `auth status` is a single-profile health probe when you already know the name. +`auth list` is the primary enumeration path — one call returns every configured profile with sanitized URL, an `authenticated` flag, and a probe `status` (`ok` / `auth-failed` / `network-error` / `server-error` / `not-probed`). Use it before asking which profile to pick. If it returns an empty `data: []`, ask the user to run `mb auth login` themselves (see the policy above) and tell you the profile name. `auth status` is a single-profile health probe when you already know the name. ### Pick the profile to use -If exactly one profile is configured and the user's intent doesn't disambiguate, use it. If multiple profiles exist and the user hasn't named one, ask via `AskUserQuestion`, presenting the names from `auth list`. Once a name is established, pass `--profile ` to **every** subsequent command. Profile names are arbitrary local labels — `prod`, `staging` — let the user pick. +If exactly one profile is configured and intent doesn't disambiguate, use it. If multiple exist and the user hasn't named one, ask via `AskUserQuestion`, presenting the names from `auth list`. Once a name is established, pass `--profile ` to **every** subsequent command. Profile names are arbitrary local labels — `prod`, `staging` — let the user pick. ## Flag conventions @@ -52,14 +52,12 @@ If exactly one profile is configured and the user's intent doesn't disambiguate, ### Some outputs are JSON envelopes, not bare strings -A handful of "lookup" verbs return a JSON object even when you only want a single field. `mb setting get ` returns `{"key": "...", "value": ...}`, not the bare value. Don't drop them raw into another flag — extract: +A handful of "lookup" verbs return a JSON object even for a single field. `mb setting get ` returns `{"key": "...", "value": ...}`, not the bare value. Extract before reusing: ```bash VALUE=$(mb setting get --json | jq -r '.value') ``` -If you find yourself piping a `--json` envelope straight into another flag and the receiving command rejects it, this is what happened. - ## Output Every list/get verb supports the same output flags: @@ -94,26 +92,28 @@ Verbs that take a payload accept it from one of four sources, **first non-empty 3. stdin (auto-detected when piped, or explicit `--stdin` where supported) 4. positional argument -Picking exactly one is required; passing two of `--body` + `--file` + `--stdin` is rejected with a `ConfigError`. +Exactly one required; passing two of `--body` + `--file` + `--stdin` is rejected with a `ConfigError`. ```bash -cat > /tmp/body.json <<'EOF' +cat > ./.scratch/body.json <<'EOF' { ... } EOF -mb create --file /tmp/body.json --profile --json +mb create --file ./.scratch/body.json --profile --json ``` Single-quoted `'EOF'` prevents the shell from interpolating `$vars` inside the JSON. +Write these working files to **`./.scratch`** in the current directory (`mkdir -p ./.scratch` first), never `/tmp` — better permissions, they persist across the session, and the user can review them. + ## Discover the full surface: `mb __manifest` -For the canonical, machine-readable inventory of every command — name, description, per-command `details`, examples, every flag with type and default, and the output JSON Schema — run: +The canonical, machine-readable inventory of every command — name, description, per-command `details`, examples, every flag with type and default, and the output JSON Schema: ```bash mb __manifest ``` -The leading `__` hides it from `--help`, but it's stable. Reach for it instead of `--help` per command. It pairs with `jq`: +The leading `__` hides it from `--help`, but it's stable. Reach for it instead of `--help` per command — to enumerate verbs, validate flag names before constructing a command, or read an output schema before parsing. Pairs with `jq`: ```bash mb __manifest | jq -r '.commands[].command' # every command name @@ -122,43 +122,42 @@ mb __manifest | jq '.commands[] | select(.command == "card query") | .args' mb __manifest | jq '.commands[] | select(.command == "card list") | .outputSchema' # output schema before parsing ``` -Use it to (a) enumerate verbs, (b) validate flag names before constructing a command, (c) read an output schema before parsing. - ## Resource quirks worth memorizing -Routine verb shapes (list / get / create / update), every flag, and output JSON Schemas live in `mb __manifest` — pull them on demand. Below is only what the manifest does _not_ tell you: the footguns and non-obvious behaviors. +Routine verb shapes (list / get / create / update), every flag, and output JSON Schemas live in `mb __manifest` — pull on demand. Below is only what the manifest does _not_ tell you: footguns and non-obvious behaviors. - **db traversal vs. rollup.** Default to granular: `database list` → `database schemas ` → `database schema-tables ` → `table get --include fields`. The rollup endpoints (`database get --include tables.fields`, `database metadata `) pull megabytes and blow the context window on any real warehouse — use them only on a small/dev db. `sync-schema` / `rescan-values` queue async work and return `{status:"ok"}` immediately; `sync-schema --wait` blocks until `initial_sync_status: complete`. - **table fields.** `table get` never returns fields on its own — pass `--include fields` (compact) or use `table fields ` (list envelope). `table metadata ` adds FKs + dimensions (heavier). `table update` patches table-level metadata only; physical columns aren't editable here. - **field has no `list`.** Fields are per-table — get them via `table get --include fields`. Never enumerate fields across a whole db (context blow-up). `field summary` is live cardinality `{field_id, count, distincts}`; `field values` is the cached distinct set (`has_more_values: true` ⇒ truncated cache). `field update` patches metadata only; `base_type` isn't editable. - **card.** `dataset_query` is the **flat** `mbql/query` value, not a legacy `{type:"query",query:…}` envelope (→ `mbql` skill). `--export-format csv|xlsx` streams the raw export (pipe to a file), bypassing the JSON envelope. `archive` is the only delete; unarchive with `update --body '{"archived":false}'`. `visualization_settings` keys are scoped by `display` and aren't pre-flighted — see the `viz` skill. -- **dashboard.** Dashcards round-trip through `PUT /api/dashboard/:id` (no per-dashcard endpoint): `update-dashcard ` patches one safely; `update --body '{"dashcards":[…]}'` replaces the whole set (omitted ids are deleted server-side; use negative ids for new cards). `create` accepts the **same** `dashcards` array in its initial body, so you can lay out the whole dashboard in one call — negative ids for new cards, and `card_id:null` plus a `visualization_settings.virtual_card` block (`{display:"text"|"heading"|"link"|…}`) for non-question cards. `create`/`update` pre-flight every positive `card_id` against live server state and exit **2** with `{ok:false,errors:[…]}` on a bad ref — non-bypassable (no `--skip-validate`). `dashboard get ` (or `--full`) hydrates dashcards/tabs; `list` omits them. **Dashcard geometry: the grid is 24 columns wide.** Each dashcard's `{col, row, size_x, size_y}` is in grid units — `col` (0-indexed, left edge) and `size_x` are columns, `row`/`size_y` are rows; **full-width is `size_x: 24`** (`size_x: 12` is half a row — the usual cause of a card that only fills half the width, since it's a common per-chart default). Keep `col + size_x ≤ 24`, start each card's `col` at 0 for a full-width stack, and don't overlap cards (the server stores whatever you send — it won't auto-fix collisions). +- **dashboard.** Dashcards round-trip through `PUT /api/dashboard/:id` (no per-dashcard endpoint): `update-dashcard ` patches one safely; `update --body '{"dashcards":[…]}'` replaces the whole set (omitted ids are deleted server-side; negative ids for new cards). `create` accepts the **same** `dashcards` array in its initial body — lay out the whole dashboard in one call: negative ids for new cards, and `card_id:null` plus a `visualization_settings.virtual_card` block (`{display:"text"|"heading"|"link"|…}`) for non-question cards. `create`/`update` pre-flight every positive `card_id` against live server state and exit **2** with `{ok:false,errors:[…]}` on a bad ref — non-bypassable (no `--skip-validate`). `dashboard get ` (or `--full`) hydrates dashcards/tabs; `list` omits them. **Dashcard geometry: the grid is 24 columns wide.** Each dashcard's `{col, row, size_x, size_y}` is in grid units — `col` (0-indexed, left edge) and `size_x` are columns, `row`/`size_y` are rows; **full-width is `size_x: 24`** (`size_x: 12` is half a row — the usual cause of a card filling only half the width, since it's a common per-chart default). Keep `col + size_x ≤ 24`, start each card's `col` at 0 for a full-width stack, and don't overlap cards (the server stores whatever you send — it won't auto-fix collisions). - **snippet `--archived` is a swap, not a union** — list returns _either_ active _or_ archived rows, never both. (Same shape for `--filter archived` on dashboard/collection.) - **segment / measure** `update` and `archive` require a non-blank `revision_message` (audit-logged); the CLI does not synthesize it on `update`. `archive` defaults to `"Archived via mb CLI"` — override with `--revision-message`. `definition` is a flat MBQL clause (→ `mbql` skill): segment = a filter, measure = exactly one aggregation. - **collection ``** accepts four forms only — positive int, `root`, `trash`, or a 21-char entity_id — anything else is a client-side `ConfigError`. `collection items` auto-paginates (cap with `--limit`, which then omits `total`). `collection tree` is **JSON-only** — `--format text` is rejected. - **setting set** parses the value as **strict JSON**: a string is `'"value"'` (inner quotes), booleans `true`/`false`, numbers bare. Wrong quoting silently errors — confirm with `setting get ` after. `setting get --json` works on every value type (it wraps bare-text responses into `{key, value}`). - **search vs. list.** For plain enumeration of cards/dashboards/collections use the dedicated `… list` verbs; reach for `search --models ` only for ranking against a query string or a cross-resource lookup. - **transform.** Iterate with `transform update `, never `delete` + `create` — keeps the row, `entity_id`, materialized table, and YAML filename (avoids `_2` suffixes and noisy git history). `transform run` needs `--wait` (or `--sync`, which also waits for the run's output table to register and returns `target_table_id`) or you get only `{run_id, final:null}`. (→ `transform` skill.) -- **setup is one-shot.** `mb setup` walks `/api/setup` for a **fresh** instance only — it errors against an already-configured one. Mostly for bootstrapping local / e2e instances. +- **setup is one-shot.** `mb setup` walks `/api/setup` for a **fresh** instance only — errors against an already-configured one. Mostly for bootstrapping local / e2e instances. - **eid** translates a string entity id → numeric id: `mb eid --model --json` (EIDs are a positional used with `--model`; or pass `--body '{"entity_ids":{"card":["…"]}}'`). Entity ids are NanoIDs that can start with `-`, which the positional form misreads as a flag (shell quotes don't help — the `-` survives into argv). For an id that may start with `-`, use `--body` — the id is a JSON string value, immune to flag parsing: `mb eid --body '{"entity_ids":{"card":["-…"]}}'`. Useful when an external system hands you an entity id and a verb needs the numeric one. - **query / uuid.** `mb query` is the ad-hoc MBQL surface (`--print-schema` → `--dry-run` → run); `mb uuid --count ` mints the `lib/uuid` values every MBQL 5 clause needs. Both workflows live in the `mbql` skill. ## Specialized skills (load on demand) -This core file is enough for any single-command task. Load the relevant skill **proactively** when intent matches — don't wing an MBQL body, a transform body, or the git-sync workflow from this overview alone. Load each via `mb skills get `. +This core file is enough for any single-command task. Load the relevant skill **proactively** when intent matches — don't wing an MBQL body, a transform body, or the git-sync workflow from this overview alone. Load via `mb skills get `. + +**Start here for anything bigger than one command.** If the user wants an outcome rather than a single verb — "make sense of my data", "build a data model", "go from raw data to a dashboard", "be my data analyst", "set up analytics for X", "answer questions about my data" — load `robot-data-engineer` first and let it route. The rest of this list is the toolbox it routes into. +- **`robot-data-engineer`** — the front-door router for the whole journey (raw data → clean tables → reusable definitions → dashboards or written answers) for a non-technical user. Detects where the user is, sets up auth and autonomy once, and routes to `data-transformation` / `semantic-layer` / `visualization` / `data-analysis`. Load this when the user describes a goal, not a step. - **`mbql`** — authoring or fixing any MBQL query body: `mb query`, a card `dataset_query`, a transform `source.query`, a measure/segment `definition`, "aggregate and group by", reading `--dry-run` errors. The query-body reference. - **`viz`** — choosing a card's `display` and authoring `visualization_settings`: "make it a bar chart", "set the pie dimension/metric", "format this column as currency", "the card renders as a table instead of a chart". The presentation counterpart to `mbql`. - **`transform`** — "create a transform", "run a transform", authoring transform body JSON, run inspection. +- **`data-transformation`** — the higher-level workflow: turning a raw, normalized source database into a small set of clean, wide, analysis-ready tables for a non-technical user — "clean up", "flatten", "denormalize", "make sense of this database", "build analysis-ready tables". Wraps `transform` (the mechanics) with the investigate → propose → build flow. +- **`semantic-layer`** — turning clean tables into reusable definitions: "make this filter reusable", "define active customers / net revenue / MRR officially", "create a segment / measure / metric", "so everyone uses the same definition". Builds on `mbql` (the definition bodies) and `transform` (widen a table first when a definition needs more than one). - **`git-sync`** — "import the latest changes", "export to git", "git sync", "dirty check", "stash before pulling". If a task spans more than one, load each. Specialized skills assume the conventions above and won't repeat them. `mb skills list` enumerates everything on the installed version. ## Don't -- **Don't run `mb auth login` for the user** — authentication is theirs (see §Auth). - Don't paste credentials or warehouse passwords in chat. Have the user run the storing command. -- Don't put `--profile` before the verb chain — the CLI parses it as a subcommand and errors out. -- Don't omit `--wait` on `transform run` / `git-sync import` for interactive flows; the next step will race the operation. -- Don't drop a JSON-envelope verb's output raw into another flag. Extract with `--json | jq -r '.'`. -- Don't add a third-party HTTP library or shell into `curl` against `/api/...` when a `mb ` exists — that bypasses retries, schema validation, and credential redaction. +- Don't shell into `curl` against `/api/...` (or add an HTTP library) when a `mb ` exists — that bypasses retries, schema validation, and credential redaction. diff --git a/skill-data/data-analysis/SKILL.md b/skill-data/data-analysis/SKILL.md new file mode 100644 index 0000000..abaffce --- /dev/null +++ b/skill-data/data-analysis/SKILL.md @@ -0,0 +1,65 @@ +--- +name: data-analysis +description: Answer real questions from clean, analysis-ready tables and hand back a plain-language report - an answer-finding task, not chart-building. Read the tables, turn the user's question into queries, run them on the live instance, sanity-check the numbers, write up findings the user can trust. Works over already-clean (wide, human-readable) data - survey/registration answers, event signups, customer lists, anything where the data holds the answer. Use when someone wants to "answer questions about my data", "report on who registered / signed up / responded", "what did people say", "analyze X", "explore this data", or "build me a report". For a non-technical user who knows their domain. Needs charts/dashboards? Use `visualization`. Tables still raw? Use `data-transformation` first. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Data Analysis + +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + +The user has a question and clean data that already holds the answer. Your job: find the answer, check it's right, and hand it back in plain language. You're an analyst, not a dashboard builder — the deliverable is a **trustworthy written answer**, optionally backed by a saved question they can re-open. + +This skill assumes the tables are already clean (wide, human-readable). If they're raw and normalized — lots of `*_field`/`*_choice` lookups, coded columns, JSON blobs — stop and route to `data-transformation` first; don't analyze on top of a mess. + +--- + +## The loop + +For each question the user asks: + +1. **Find where the answer lives.** List tables (`mb table list`, `mb db schema-tables `). Read the columns (`mb table fields `). Clean datasets often ship the same facts two ways — a **wide** table (one row per thing, easy to read) and a **long** table (one row per attribute, easy to aggregate over many-valued answers). Pick the one that fits the question: per-person facts → wide; "which option was most popular" across a multi-select → long. + +2. **Turn the question into a query.** Write it, run it (`mb query`). Start small — a `count(*)` and a couple of sample rows to confirm you're pointed at the right table and the columns mean what you think. Then write the real query. + +3. **Sanity-check before you believe it.** A number with no cross-check is a guess. Confirm row counts against a total you trust, watch for nulls/blanks inflating or deflating a percentage, and re-read the column you grouped on — a `type/Category` column with "confirmed"/"cancelled" means your "how many registered" answer depends on which statuses you counted. State the denominator. + +4. **Report in plain language.** Lead with the answer, then how you got it. Numbers get context ("9 of 10 confirmed"), not bare figures. For free-text answers, quote a few real responses rather than only counting them — the words are the value. + +--- + +## What to ask the user up front + +Don't over-interrogate, but settle the things that change the answer: + +- **Scope.** All-time or a window? Everyone, or only confirmed/active? A "how many registered" with no status filter and a "how many _confirmed_" are different numbers — pick the one they mean, and say which you used. +- **Cut.** Do they want the headline number, or the number broken down (by role, by company, by version)? A breakdown is usually one `GROUP BY` away and far more useful. +- **Form of the answer.** A number in chat? A short written digest? A saved question they can re-open and refilter? If they want something durable or visual, that's the `visualization` skill — hand off. + +When genuinely unsure which interpretation they mean, ask — never silently pick one and present it as the answer. + +--- + +## Survey / registration data — the common shape + +A lot of "analyze who registered / what did people say" work lands on event or survey data, which has a recognizable shape worth calling out: + +- A **per-registrant wide table** — name, company, role, status, plus one column per single-answer question. Use it for "who registered", rosters, breakdowns by role/version/company, and any per-person filter. +- A **long answers table** — one row per (registrant, question, answer). Use it for **multi-select** questions (one person picks several options, so they can't flatten into one wide column) and for "which option was chosen most". Group by the question text, then by the answer value. +- **Question definitions** — the catalog of what was asked, the answer choices, free-text vs single vs multi. Read this first to know which questions exist and how each is typed before you start counting. + +Three report families cover most asks: + +1. **Roster** — who registered, with the facts that matter (company, role, status). A filtered, ordered read of the wide table. +2. **Distribution** — how the group splits on a single-select (role, version, customer-or-not). A `GROUP BY` with counts; the agent-facing answer is "X% picked A, Y% picked B". +3. **Open-ended digest** — what people said in free-text ("what do you want to learn / teach / discuss"). Small N usually — list the actual answers, don't just count them; the responses are the point. + +--- + +## Don't + +- **Don't analyze raw, un-cleaned tables.** If the data is normalized/coded/JSON, route to `data-transformation` first and analyze the clean output. +- **Don't report a number you didn't sanity-check.** No denominator, no null-check → no answer. +- **Don't silently pick a scope.** "Registered" vs "confirmed", all-time vs window — state which you used, or ask. +- **Don't build charts/dashboards here.** A written answer (and maybe one saved question) is the deliverable; if they want it visual, that's `visualization`. +- **Don't only count free-text.** Quote the real responses — the words carry the insight a count throws away. diff --git a/skill-data/data-transformation/SKILL.md b/skill-data/data-transformation/SKILL.md new file mode 100644 index 0000000..84f6a5c --- /dev/null +++ b/skill-data/data-transformation/SKILL.md @@ -0,0 +1,200 @@ +--- +name: data-transformation +description: Turn a raw, normalized source database into a small set of clean, analysis-ready tables. Claude investigates the source, works out the real-world "things" the data is about (even when each one is scattered across several tables), decodes coded/JSON/translated values into readable text, and builds one wide, denormalized table per thing as Metabase transforms. Designed for a non-technical user who knows their domain. Use whenever someone wants to "clean up", "flatten", "denormalize", "make sense of", or "build analysis-ready tables from" a raw database. This is the strategy skill for modeling a whole database into a set of clean tables; for authoring or running one individual transform (body shape, flags, run inspection), use the `transform` skill instead. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion, EnterPlanMode, ExitPlanMode +--- + +# Data Transformation + +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: ask before showing PII row-by-row (names, emails, phones) — default to aggregates; when asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL; honor the autonomy mode the user picked. The jargon rules are spelled out in detail below (**Who you're talking to**). Full contract and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + +Your job: take a raw source database — usually normalized, often synced from some SaaS tool by a connector like Fivetran, Airbyte, or Stitch — and produce a **small set of wide, clean, analysis-ready tables**, one per real-world _thing_ the data is about, built as Metabase **transforms** the user can inspect. + +Drive everything through the `mb` CLI. Load the skills you'll need: + +```bash +mb skills get core # auth, profiles, db/table/field inspection, query +mb skills get mbql # if you build transform queries in MBQL +mb skills get transform # creating/running transforms, run inspection +``` + +Users authenticate. You pick the profile per `core`'s **Auth & profiles** and pass `--profile ` to every command. That profile's `url` is the instance's base URL. Browser links below are built from it, ensuring the links are consistent with your CLI usage. + +If you are making transforms, use the transform skill. + +--- + +## Who you're talking to + +A **non-technical user who knows their domain well** — they understand the business (events, customers, invoices, etc.) but not databases. + +- **No modeling jargon.** Skip warehouse vocabulary — grain, fact/dimension table, wide/long tables, normalize, surrogate key, entity, materialize — prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is", "the kinds of things in here". **But don't overdo it:** basic relational terms are fine — table, column, ERD, schema, key, foreign key (cardinality too, though "one-to-many" usually lands better). **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're not database jargon. +- **Don't lean on raw SQL to communicate.** They may follow a simple `SELECT`, but don't explain work via SQL or ask them to read/write it. +- Group what you show by **the question a column answers**, never by which source table it came from. +- Be a **helpful assistant, not an engineer reporting status.** Elide machinery; ask sharp questions that matter. +- Your user may say "go" and come back later. **If you ever ask the user a question, wait for their answer.** + +--- + +## Two kinds of decisions + +Sort every choice into one of these. + +**Hard rules — absolutes, never ask:** + +1. Never flatten multi-valued fields into opaque blobs (e.g. three options squished: `"email | phone | text"`). It destroys filterability (the whole point). +2. Never use jargon with the user. Explain by domain and telos. +3. Always surface **real data you're about to leave out** proactively, ranked by how much is extant. +4. Never guess what schema mean from their name alone. Confirm against actual values, interpret them in context: the table the field belongs to and the relevant domain (e.g., a status on orders ≠ status on subscriptions). +5. Never silently drop a whole _thing_. Dropping a column is routine; dropping a whole kind-of-thing (e.g. "suppliers") must be surfaced and confirmed. +6. Never drop columns that link things together. Every table keeps its own id **and** the ids tying it to other tables — alongside the readable labels you copy in, not instead of. The label is for reading; the id is for joining. You're building tables about _related_ things, so they **will** be combined ("sales per region", "messages per customer") — dropped ids make that quietly impossible and the user will regret it. Keep the ids; don't force the user to stare at them. +7. Never bake a non-obvious business rule into a table without confirming it in plain terms. When a transform encodes a judgment the user would have an opinion on — how money nets, which row is the "current" one, what "active" means — say it back in one plain sentence and get a yes/no first. You know only the columns; they know the business. Wrong rules hide insidiously in clean-looking tables. ("I'm treating each person's most recent sign-up as their current one — right?") +8. Never sneak sensitive personal data through. Flag it on sight — addresses, phone numbers, emails, IPs, financial, etc. — and ask the user how to handle it (the prudential call below). Always surface, never silently expose it in a table others will browse. +9. Never overwrite existing tables or other transforms' outputs. Before building, check the target name is unused (`mb transform list`, `mb table list`); if it's in use, stop and surface it — building over it silently destroys their data. Reuse names only for updating _your own_ transform (`transform update`), never for clobbering another. + +**Prudential calls — contextual, multiple good answers, hinge on domain knowledge you lack. State a lean, then let the user decide.** The recurring ones: + +- **Multi-valued attribute** (one response → many options; one order → many line items): keep it filterable! Structured columns for predefined lists, or simple join tables, never opaque text. Structure is the user's call. Lean: easiest filtering, probably flat. +- **Layering**: default **flat** — one self-contained table per thing, no hidden intermediate tables. Suggest a shared cleaned-up base table only for DRY, avoiding copying complex logic across many transforms. Even then, ask. +- **Out-of-scope things**: surface every domain-model you find and ask in/out, rather than inferring scope from what they happened to mention. +- **A repeating thing vs. the events it takes part in**: one table can mix a _stable_ thing (a customer, a company) with _repeating_ events (each order, each visit), copying the stable details onto every event row. If that thing genuinely recurs — same customer on many rows — consider a one-row-per-thing table too, linked by id, so "how many distinct X" and the per-X details have clean homes. Lean: split when recurrence is real, but one table when each appears once. (Phase 0's one-to-one / one-to-many check already tells you which.) +- **Handling sensitive data** (addresses, emails, phones, IPs, financial details): once you've flagged it (rule 8), _how_ to carry it is user's choice — keep as-is, mask (partial redaction), or drop. Lean: keep what is needed, mask the rest, drop the useless. + +Phrase a prudential call as a lean plus a nod: + +> "I'd keep these as one simple table rather than splitting into behind-the-scenes pieces — easier to look through. Good?" + +--- + +## The process + +### Phase 0 — Get Oriented + +**Pin down where the data lives — ask before you hunt.** A table or schema name the user mentions tells you _what_ but not _where_: an instance can hold several databases, each with several schemas. Rather than listing them all to find it, just ask — "Which database is this in, and the schema if you know it? No worries if you're not sure, I can find it." A confident answer short-circuits a lot of blind searching; "not sure" costs nothing and you fall back to locating it yourself. If you've genuinely looked and still can't find a table the user is sure is there, don't keep digging. One possible reason is that Metabase hasn't picked up that database's latest schema yet — gently raise it and ask whether the data's been synced recently, and let the user run the sync from Metabase if it's needed. + +As soon as you know which database and schema you're in: + +- **Show the user the map.** Open the instance's schema map for that schema so they can follow along: `/data-studio/schema-viewer?database-id=&schema=`. Open it in their browser if you can (e.g. `open` / `xdg-open`); else paste the URL. Don't skip this. +- **Ask for a head start.** "Do you have a picture or file showing how your data fits together, like an ERD?" If yes, read it — it shortcuts the next steps. +- **Ask for their conventions.** "Is there already cleaned-up data, or a past project, that shows how your team likes this done?" If yes, inspect it: it tells you their naming, their idea of "clean," and existing tables worth linking to. + +### Phase 1 — Investigate (in plan mode, if they choose) + +Orientation done, you're about to go heads-down. First, offer two ways to work: + +> Two ways I can take it from here: +> +> - **I dig through it all and bring you a complete plan** to approve before I build anything — quieter; you won't hear much until it's ready. +> - **We work it out together** — I share what I find and we make the calls as we go. + +First path: **enter plan mode** (`EnterPlanMode`). Everything up to the agreed table list — investigate, present, prudential calls, naming (Phases 1–3) — happens inside it, read-only; you exit once, at the approval gate before building (Phase 4). Second path: skip it, shape it conversationally through the same phases. Either way, don't build until the design is settled and user-approved. + +Plan mode is a long quiet stretch — they said "go" and walked off. So whenever you surface — a question now, the plan at the end — **carry your own context**: recap what it rests on right before you ask, never a back-reference to something said while they were away (the router's contract spells this out). + +Then dig in. Don't narrate this — a single "Let me take a look at what's in here — one minute" is enough. Keep it cheap: never pull whole-warehouse rollups (they blow up); use compact column listings, `LIMIT`/sample queries, and `GROUP BY count(*)`. + +1. **Map the tables.** List them; pull each one's column names and types; note its own id. +2. **Find the decode tables.** Normalized SaaS data hides meaning in lookups — `*_field`, `*_field_choice`, `*_question`, `*_choice`, `*_type`. A column like `doodad_4471` is meaningless until you join the lookup and find it's _"Preferred vehicular transport"_. Build that code → label map yourself by joining the lookups — never hand the user a coded column and ask what it means — before showing them anything. +3. **Prove the connections — don't trust declared keys.** Synced databases usually have none. If that's the case, ask the user if they have ERD or relationship information (screenshot, JSON, documentation, etc.). For each `_id`, guess it points at ``, then check what fraction of values actually match the target's id: high = real link, low = decoy, discard. Note one-to-one vs one-to-many. **Also look outward** — does a thing you're about to build already exist as clean data elsewhere in the instance (an existing customers table your people match, a product list)? If so, plan to _link_ to it, not duplicate it. +4. **Pin down "one row per what."** Count rows; check the id is unique; figure out what a single row is. **Watch for lies:** a stale count column, or a table that looks like "all of X" but is a filtered subset. +5. **Reconcile across related tables.** Do child rows all link to a parent? Orphans? Is one table a trimmed snapshot while another keeps everything? These mismatches matter and the user can't see them — you must. +6. **Profile the values.** List distinct values for coded/low-variety columns; check how full (% non-empty) any column you might drop is; spot multi-valued JSON fields. Profile with the cleaning checklist (end of file) in mind — surface the quality smells you hit, don't silently fix them. +7. **Cluster into things.** Group tables and columns into the real-world things they describe — a thing may span several tables (one _customer_ across a main table + a loyalty table + custom-profile columns). Decide "one row per \_\_\_" for each and gather its attributes, decoded. Watch for a table that secretly mixes _two_ things — a stable thing plus its repeating events; that's the split in the prudential calls above. + +**Then, still quietly, sketch the design space.** Once the things and how they connect are pinned, brainstorm the range of questions this data could answer — finance views, leaderboards, breakdowns. **Don't show it to the user or build any of it.** It only pressure-tests your design: would a reasonable pivot to a nearby question force a rewrite? When keeping a column or finer grain _cheaply_ preserves that flexibility, keep it. Serve the user's stated concern — but don't scope so tightly that the next question means starting over. + +### Phase 2 — Present what you found (plain language) + +Three things, in order: + +**(a) The things, in plain terms.** One short blurb each. E.g. in an online store: + +> **Customers** — one row per customer. Who they are (name, company, location), how they've been in touch, what they've spent, whether they're active or churned. + +**(b) The full inventory — including what you'd leave out.** Never infer scope silently: + +> I found 6 kinds of things: **Customers, Orders, Products, Suppliers, Shipments, Returns.** I'd build the first four. **Shipments** and **Returns** also have real data — want those in, or leave them? + +**(c) What would be set aside — proactively, ranked, two buckets:** + +> Nothing important is lost. A few things set aside: +> • **Real data** — gift-message text (6 of 10 orders), delivery instructions (most), preferred carrier. Minor, but real — want any kept? +> • **Safe to drop** — duplicate product names in other languages, internal bookkeeping columns. No real loss. + +If you spotted existing clean data to link to (step 3), raise it here too — and **always run a suspected match past the user before wiring it; never graft onto their existing data silently.** Then ask your prudential questions, one at a time, each a lean-plus-nod. + +### Phase 3 — Iterate + +Cheap, because nothing's built. Adjust the set of things, what's kept, and the shape of any multi-valued pieces until the user's happy. **Agree on what each table will be called** — propose a clear name for each (matching any naming pattern you found in their existing data, Phase 0) and let them adjust. Confirm each name is free — not already an existing table or another transform's output (rule 9) — so building can't overwrite anyone's data. Settle the names before building: the name you agree on is the one you build and keep. Re-confirm the final picture in one short recap. **In plan mode, that recap _is_ your exit:** present it as the plan and call `ExitPlanMode` — approval here is the single go-ahead to build. (Iterating together? The recap is just your check before building.) + +### Phase 4 — Build, check, hand back + +Design settled — now you build, the first step that writes; plan mode, if you used it, is behind you. Build one wide transform per agreed thing — and build for how it'll be judged: aim for output that's readable on sight, not just one that runs clean. Each table: + +- **Denormalized, but the link stays.** Copy in related context so casual reading needs no lookups (a product's name and price on the orders table) — **and keep the linking id beside it** (the product's id too, per rule 6). Use the same id name everywhere a thing appears. +- **Decoded**: codes and JSON become readable text; bookkeeping columns and soft-deleted rows are gone (filter the source's soft-delete flag — Fivetran's `_fivetran_deleted`, Airbyte's `_ab_cdc_deleted_at`, or a plain `deleted_at`/`is_deleted` — so tombstones never reach clean data; not every source has one). +- **Clean, plain column names**, consistent across tables. +- **Multi-valued pieces** in the agreed filterable structure (rule 1). +- **Keep the detail; don't pre-summarize it away.** Build the detailed rows (one per order, one per payment), not pre-computed totals. A convenience count is fine _beside_ the rows, never _instead of_ them — a frozen total only ever answers the one question it was summed for. + +Then make the links real, not just implied: + +- **Wire foreign keys between your tables.** Mark each linking id as a foreign key pointing at the id it references (`mb field update` — set the column's type to foreign-key and its target). Now Metabase itself knows the tables connect and can traverse them. +- **Graft onto existing clean data** the user approved (step 3 / Phase 1): point the linking id at the existing table's id the same way. Link, don't duplicate. +- **Write down what you learned.** You decoded every column's real meaning while investigating — save it: set a short description on each table and its non-obvious columns (`mb table update` / `mb field update`). The cleaned data then explains itself inside Metabase — in search, in the Question editor, to Metabot — instead of the knowledge living only in this chat. + +When you start refining a built transform _with_ the user, open its inspector for them so you're looking at the same thing — `/data-studio/transforms//inspect` — opening it in their browser if you can, else pasting the URL. Iterate with `transform update`, never delete-and-recreate. + +**Check the output before handing back — the user can't.** Two passes, in order. + +**Pass 1 — Correctness (did it run right).** After each transform runs, run quick ad-hoc tests against what Phase 0 led you to expect: row counts in the right ballpark, decoded columns readable (no stray codes), linking ids that resolve to the other tables, no column unexpectedly all-null or blown up in count. Treat surprises as bugs to chase, not noise. A table that can't combine with the others — a dropped id, or the same id named two ways — is a silent failure; catch it here. + +**Pass 2 — Fitness (is it nice to use).** Correct isn't the bar; _usable_ is. `SELECT * FROM LIMIT 20` and read every column left to right as if you'd never seen the source: would a non-technical person find each one readable? Smells that say not-yet, even though nothing errored: + +- a multi-valued column still a raw JSON/array blob or `["Email","SMS"]` text — rule 1 never actually got resolved; +- decoded answers still carrying raw ids with no readable label, or one cryptic column per code; +- a code sitting beside its own label when only the label is wanted, or two columns saying the same thing; +- a "decoded" column that reads as a slug (`pref_contact_mthd`) rather than plain language. + +A readability smell is a bug: fix it (`transform update`), re-run, look again. When the fix is really a shape choice (how a multi-select is structured) or a keep/drop call, that's the user's — surface it, don't silently decide. + +Then report plainly: + +> Done. Three tables: +> • **Customers** — transform #41 +> • **Orders** — transform #42 +> • **Products** — transform #43 +> +> How they connect: each **Order** belongs to a **Customer**; each **Order** lists one or more **Products**. + +End on that connection map: it's what the user reads to trust the result, and what lets whatever they build next join the tables on the right ids instead of guessing how they relate. + +--- + +## A worked decode example (for your reference, not the user's) + +The shape recurs across SaaS exports, whatever the domain. A coded column — say `c_4471` on a responses table — means nothing alone. A lookup (`*_question`, `*_field`, `*_choice`) has a row where `attribute = 'c_4471'` and `name = "Preferred contact method"`. Single-select answers are often already `{"id":…, "value":"Email"}` — use `value`. Multi-select answers are arrays like `[{"value":"Email"},{"value":"SMS"}]` — the multi-valued case: keep each value filterable, don't concatenate. + +Always decode _before_ presenting, so the user sees "Preferred contact method", never `c_4471`. Three cautions: + +- **Pull the readable name from the lookup, don't type it in.** The label (and any question text) should come _from_ the lookup's `name`, sourced in the query — not pasted as a literal. A hard-typed label goes wrong the moment the source changes. +- **Codes are usually specific to today's data.** `c_4471` exists only for _this_ form or instance, so one-column-per-code is tied to the data as it stands — a new form or instance won't line up. When that's unavoidable, say so on hand-back ("reflects the current form; new questions need a refresh"), and with many such codes prefer the companion-table shape (one row per answer, question text from the lookup): nothing hard-typed, and adding a question is a smaller change. +- **Normalize encodings once.** Turn raw representations clean in the table itself, so nothing downstream re-derives them: signed amounts → clear positive numbers by kind, 0/1 → true/false, timestamps → one consistent timezone, text → trimmed and case-consistent, and junk placeholders (`"NULL"`, `"N/A"`, `"-"`, empty string) → real null. + +--- + +## Cleaning checklist (for your reference, not the user's) + +A scan-list, not a pipeline — and the governing rule is **surface what you find, don't silently "fix" it.** Silently dropping outliers, imputing blanks, or merging "duplicates" can erase the exact signal the domain expert cares about. Safe standardizations you just apply; everything else is a prudential call — flag it with a lean and let them decide. + +**Just apply** (safe, universal — already your default): consistent timestamps/timezone; trimmed, case-consistent text; junk placeholders (`"NULL"`, `"N/A"`, `"-"`, `""`) → real null; sane numeric precision; booleans from varied forms (Y/N, 1/0). + +**Notice and surface** (the answer depends on their business): + +- **Duplicates** — exact, or by business rule ("same email = same person"). Never merge silently. +- **Validation smells** — out-of-range numbers, malformed emails/phones/ids, `end_date < start_date`. +- **Outliers** — values that read as data-entry errors. Flag, don't drop. +- **Missing data** — random vs. systematic? Surface the pattern; never silently impute or default. +- **Free text / mixed encodings** — handle the safe parts, flag the rest. + +Already covered by the rules above, listed so they stay on your radar: structural reshaping (decode/JSON/multi-value), orphans & key validity (Phase 0 step 5 + the post-run check), filtering soft-deletes & dropping bookkeeping columns (Phase 4's **Decoded** step), and recording meanings (the descriptions step). diff --git a/skill-data/document/SKILL.md b/skill-data/document/SKILL.md index c3b9862..381fab9 100644 --- a/skill-data/document/SKILL.md +++ b/skill-data/document/SKILL.md @@ -145,10 +145,10 @@ Each entry in `cards` needs at least `{name, dataset_query, display, visualizati `update` replaces the whole `document` body, so the safe loop is **read → edit → write**. A fetched body already carries `_id`s on its id-bearing nodes, so preserve them — only mint new ones for id-bearing nodes you add: ```bash -mb document get --full --profile --json | jq '.document' > /tmp/body.json -# edit /tmp/body.json (add nodes — give each new id-bearing node a fresh `mb uuid` _id) … -jq -n --slurpfile d /tmp/body.json '{document: $d[0]}' > /tmp/patch.json -mb document update --file /tmp/patch.json --profile --json +mb document get --full --profile --json | jq '.document' > ./.scratch/body.json +# edit ./.scratch/body.json (add nodes — give each new id-bearing node a fresh `mb uuid` _id) … +jq -n --slurpfile d ./.scratch/body.json '{document: $d[0]}' > ./.scratch/patch.json +mb document update --file ./.scratch/patch.json --profile --json ``` Don't hand-merge a partial node tree into a live document — pull the current `document`, mutate the array, and PUT the whole thing back. To rename without touching the body, patch only `name`: `mb document update --body '{"name":"New title"}'`. diff --git a/skill-data/mbql/SKILL.md b/skill-data/mbql/SKILL.md index 3518d68..1818d9f 100644 --- a/skill-data/mbql/SKILL.md +++ b/skill-data/mbql/SKILL.md @@ -1,6 +1,6 @@ --- name: mbql -description: Author Metabase MBQL 5 query bodies for the `mb` CLI — the only hand-authorable query format. Covers the JSON shape (lib/type mbql/query, flat stages, numeric ids), the "options object always second" clause rule, when lib/uuid is needed (it's optional — only to reference a clause), the print-schema → dry-run → run validation loop, where MBQL 5 is consumed (mb query, card dataset_query, transform source.query, measure/segment definition), the flat-vs-legacy-envelope footgun, joins and FK traversal, multi-stage pipelines, and naming aggregation output columns. Load whenever building or fixing an MBQL query by hand — "write an MBQL query", "create a card from MBQL", "the dataset_query is wrong", "fix the validation errors", "aggregate and group by", "order by the count", "join two tables", "month-over-month", or any `--dry-run` / `mb query` work. +description: Author Metabase MBQL 5 query bodies for the `mb` CLI - the only hand-authorable query format. Covers the JSON shape (lib/type mbql/query, flat numeric-id stages), the options-object-always-second clause rule, when lib/uuid is needed (optional - only to reference a clause), the print-schema/dry-run/run loop, where MBQL 5 is consumed (mb query, card dataset_query, transform source.query, measure/segment definition), the flat-vs-legacy-envelope footgun, joins and FK traversal, multi-stage pipelines, naming aggregation columns. Load when building or fixing an MBQL query by hand - "write an MBQL query", "create a card from MBQL", "the dataset_query is wrong", "fix the validation errors", "aggregate and group by", "join two tables", "month-over-month", or any `--dry-run` / `mb query` work. allowed-tools: Read, Write, Edit, Bash, AskUserQuestion --- @@ -8,13 +8,13 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion MBQL 5 is the **only query format you can author by hand** with confidence — it has a bundled JSON Schema, so the CLI pre-flight-validates it before sending. Legacy MBQL 4 and native SQL are accepted but **not** schema-validated (see "Other formats" below). -Prefer MBQL over native SQL: it's portable across warehouse engines and the CLI pre-flight-validates it. Try it first, but don't force it — fall back to native SQL when MBQL can't express what you need, or when an MBQL body keeps failing server-side and you can't resolve it. +Prefer MBQL over native SQL: portable across warehouse engines and pre-flight-validated. Try it first; fall back to native SQL when MBQL can't express what you need, or when an MBQL body keeps failing server-side and you can't resolve it. -The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). +General flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). ## The shape -A query is a flat object — `lib/type`, a numeric `database` id, and an ordered `stages` array. No recursive `source-query` nesting; multi-step queries are sibling stages. +A flat object — `lib/type`, a numeric `database` id, and an ordered `stages` array. No recursive `source-query` nesting; multi-step queries are sibling stages. ```json { @@ -31,7 +31,7 @@ A query is a flat object — `lib/type`, a numeric `database` id, and an ordered } ``` -- **Numeric ids only.** `database`, `source-table`, and field ids are integers from `mb database list` / `mb table get --include fields`. (The portable YAML representation under git-sync uses _names_ like `[Sample Database, PUBLIC, ORDERS]`; the CLI's `/api/dataset` form uses numeric ids — don't mix them.) +- **Numeric ids only.** `database`, `source-table`, and field ids are integers from `mb database list` / `mb table get --include fields`. (Git-sync YAML uses _names_ like `[Sample Database, PUBLIC, ORDERS]`; the `/api/dataset` form uses numeric ids — don't mix them.) - **First stage** carries `source-table` (a table id) or `source-card` (a saved card). Later stages omit both and read the previous stage's output columns by name. - `source-card` references a saved card by its **numeric id** (from `mb card list`), not its string entity id; downstream fields are referenced by column name (string), not a field id. @@ -53,9 +53,9 @@ The same `[op, {options}, …]` rule holds for `aggregation`, `breakout` (a list ## UUIDs: optional — mint only to reference a clause -`lib/uuid` is **optional — leave it out whenever you can.** Omit it and the server generates a unique one for every clause as the query comes in; an empty options object `{}` is the normal, preferred case. Don't add a UUID per clause: it's needless work, and the more UUIDs you hand-manage the easier it is to trip the server's "all `lib/uuid`s must be unique" check — a duplicated UUID passes pre-flight, then fails server-side. +`lib/uuid` is **optional — leave it out whenever you can.** Omit it and the server generates a unique one for every clause; an empty options object `{}` is the normal case. The more UUIDs you hand-manage the easier it is to trip the server's "all `lib/uuid`s must be unique" check — a duplicated UUID passes pre-flight, then fails server-side. -Set an explicit `lib/uuid` only when you must **reference a clause from elsewhere in the query** — the one thing the server can't do for you, since you have to know the value to point at. The case that needs it: **ordering by (or otherwise reusing) an aggregation.** `["aggregation", {…}, ""]`'s third arg is the **string** `lib/uuid` of the target aggregation, so give that aggregation an explicit `lib/uuid` and point the ref at the same string. A numeric position fails with `must be the target aggregation's lib/uuid (string), not a numeric position`. +Set an explicit `lib/uuid` only when you must **reference a clause from elsewhere in the query** — you have to know the value to point at. The case that needs it: **ordering by (or otherwise reusing) an aggregation.** `["aggregation", {…}, ""]`'s third arg is the **string** `lib/uuid` of the target aggregation, so give that aggregation an explicit `lib/uuid` and point the ref at the same string. A numeric position fails with `must be the target aggregation's lib/uuid (string), not a numeric position`. ```json "aggregation": [["count", { "lib/uuid": "AGG_UUID" }]], @@ -64,7 +64,7 @@ Set an explicit `lib/uuid` only when you must **reference a clause from elsewher (`AGG_UUID` is both the aggregation's own `lib/uuid` and the string the ref points at — one value, by string equality. Every other clause omits its UUID. Expression refs work the same way but key off the expression's `lib/expression-name` string, so expressions rarely need an explicit `lib/uuid`.) -On the rare occasion you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is either rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``) or, if it happens to look valid, risks colliding with another clause. Only `mb uuid`gives you genuine, unique v4s — mint just the few you reference (this also covers native template-tag ids and any other`format: "uuid"` slot): +When you do need one, **always mint it with `mb uuid` — never write, guess, or copy a UUID yourself.** A hand-authored value is rejected pre-flight as not-a-v4 (`"a1"`, `"uuid-1"`, `"agg-uuid-001"` → `must be a UUID v4 (RFC 4122) — run \`mb uuid\``), or if it looks valid risks colliding with another clause. Only `mb uuid`gives genuine, unique v4s — mint just the few you reference (also covers native template-tag ids and any other`format: "uuid"` slot): ```bash mb uuid --count 2 --json # mint only the clauses you actually reference @@ -75,7 +75,7 @@ mb uuid --count 2 --json # mint only the clauses you actually reference `mb query` is the canonical authoring surface. Three modes: ```bash -mb query --print-schema --profile > /tmp/mbql-schema.json # 1. fetch the schema +mb query --print-schema --profile > ./.scratch/mbql-schema.json # 1. fetch the schema mb query --file q.json --dry-run --profile # 2. validate, no network mb query --file q.json --profile --json # 3. validate + run ``` @@ -86,7 +86,7 @@ mb query --file q.json --profile --json # 3. validate + `path` is a JSON Pointer into the body (`/stages/0/aggregation/0`); `message` is the validator error. Exit codes: `0` valid + ran, `2` validation failed / malformed body, `1` server-side error after a valid pre-flight. -**Pre-flight is a lightweight shape check, not the full backend validator.** It checks JSON shape, `lib/uuid` format, and enum values — not operator names, the first-stage source rule, or whether a reference resolves. A clean `--dry-run` is necessary but not sufficient: a body can pass pre-flight and still fail on the server (exit `1`). The Metabase server is the authority — when a run fails, read its error and fix the body. The common ones and what they mean: +**Pre-flight is a lightweight shape check, not the full backend validator.** It checks JSON shape, `lib/uuid` format, and enum values — not operator names, the first-stage source rule, or whether a reference resolves. A clean `--dry-run` is necessary but not sufficient: a body can pass pre-flight and still fail on the server (exit `1`). The server is the authority — when a run fails, read its error and fix the body. Common ones: - `not a known MBQL clause` → a misspelled or unsupported **operator**. Check the vocabulary in `operators.md` (`mb skills get mbql --full`). - `Initial MBQL stage must have either :source-table or :source-card` → the **first stage** is missing its source (a numeric table or card id); only the first stage takes one, later stages read the previous stage's columns. @@ -95,11 +95,11 @@ mb query --file q.json --profile --json # 3. validate + A successful run emits the compact envelope by default: `data.rows` + slim `data.cols` (`name`, `display_name`, `base_type`, `semantic_type`). Pass `--full` for the raw `/api/dataset` envelope (`results_metadata`, `native_form`, per-column fingerprints/`field_ref`) only when you need that metadata; `--fields data.rows` narrows to rows alone. `mb query` also runs a **native** body — `{database, type:"native", native:{query:"SELECT …"}}` — which skips pre-flight; the quickest way to eyeball warehouse data. -`--skip-validate` bypasses the pre-flight and sends as-is — use only when the bundled schema disagrees with what the server actually accepts (drift / false negative). Mutually exclusive with `--dry-run`. The same flag exists on `card create/update` and `transform create/update`. +`--skip-validate` bypasses pre-flight and sends as-is — use only when the bundled schema disagrees with what the server actually accepts (drift / false negative). Mutually exclusive with `--dry-run`. Same flag exists on `card create/update` and `transform create/update`. ## Where MBQL 5 is consumed -The same body and the same pre-flight apply everywhere a query is embedded. Each pre-flights only when the value is MBQL 5 (`lib/type: "mbql/query"`); legacy shapes skip it; `--skip-validate` bypasses. +The same body and pre-flight apply everywhere a query is embedded. Each pre-flights only when the value is MBQL 5 (`lib/type: "mbql/query"`); legacy shapes skip it; `--skip-validate` bypasses. | Command | MBQL 5 lives at | Notes | | --------------------------------------- | ---------------------------------------------- | ------------------------------------------- | @@ -122,16 +122,16 @@ The most common mistake. The legacy MBQL 4 shape `{ "type": "query", "database": } ``` -No `type:"query"` wrapper, no `query:` nesting. If you wrap MBQL 5 inside a legacy envelope the CLI rejects it pre-send with a `ConfigError` (no `--skip-validate` gets it past). If it ever reached the server it would store silently and fail at run time with `Initial MBQL stage must have either :source-table or :source-card`. +No `type:"query"` wrapper, no `query:` nesting. If you wrap MBQL 5 inside a legacy envelope the CLI rejects it pre-send with a `ConfigError` (no `--skip-validate` gets it past). If it reached the server it would store silently and fail at run time with `Initial MBQL stage must have either :source-table or :source-card`. ## Other formats skip pre-flight -Anything that is not `lib/type: "mbql/query"` is sent as-is and normalized server-side: +Anything not `lib/type: "mbql/query"` is sent as-is and normalized server-side: - **Legacy MBQL 4** — `{ "type": "query", "database": N, "query": { "source-table": T, … } }` - **Native SQL** — `{ "type": "native", "database": N, "native": { "query": "SELECT …" } }` -`mb query --file probe.json` runs these directly; `--dry-run` on them returns `{ ok: true, errors: [] }`. Don't author MBQL 4 by hand — if you need a legacy or complex query, build it in the Metabase UI and pull the body with `mb card get --full --json` / `mb transform get --full --json`. +`mb query --file probe.json` runs these directly; `--dry-run` on them returns `{ ok: true, errors: [] }`. Don't author MBQL 4 by hand — build a legacy or complex query in the Metabase UI and pull the body with `mb card get --full --json` / `mb transform get --full --json`. ## Joins and FK traversal @@ -154,7 +154,7 @@ Two ways to read columns from a related table. "breakout": [["field", { "join-alias": "Customers" }, 1682]] ``` -The condition's left ref is a column of the stage's own source (`1711` = orders.customer_id); the right ref carries `join-alias` and points at the joined table's key (`1684` = customers.id). Every later reference to a joined column (`1682` = customers.plan) needs that same `join-alias`. Stack multiple objects in `joins` for multiple joins, each with its own `alias`. +Left ref is a column of the stage's own source (`1711` = orders.customer_id); the right ref carries `join-alias` and points at the joined table's key (`1684` = customers.id). Every later reference to a joined column (`1682` = customers.plan) needs that same `join-alias`. Stack multiple objects in `joins`, each with its own `alias`. **Implicit FK join via `source-field`.** For a single-hop FK lookup, skip the join — put the FK column's id in the target field's `source-field` option and Metabase traverses the relationship: @@ -166,7 +166,7 @@ The condition's left ref is a column of the stage's own source (`1711` = orders. ## Multi-stage pipelines -Stages run in order; each reads the **previous stage's output columns** — the breakouts and aggregations it produced — referenced by **string name + `base-type`**, not a numeric field id. Only the first stage takes a `source-table`/`source-card`. The reason to add a stage is to operate on an aggregate (you can't filter or order by an aggregation within the stage that computes it): aggregate, then filter the aggregate, then order + limit. +Stages run in order; each reads the **previous stage's output columns** — the breakouts and aggregations it produced — referenced by **string name + `base-type`**, not a numeric field id. Only the first stage takes a `source-table`/`source-card`. Add a stage to operate on an aggregate (you can't filter or order by an aggregation within the stage that computes it): aggregate, then filter the aggregate, then order + limit. ```json "stages": [ @@ -197,7 +197,7 @@ Later stages address the first stage's aggregation by the `name` you gave it (`" ## Naming aggregation output columns -Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2`, `sum`, … — fine for an ad-hoc run, ugly when the output is a transform target table or a card column. Set `name` (becomes the warehouse column name) and `display-name` (the UI header) in the aggregation's options: +Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2`, `sum`, … — fine for an ad-hoc run, ugly for a transform target table or card column. Set `name` (the warehouse column name) and `display-name` (the UI header) in the aggregation's options: ```json ["count", { "name": "shipments_shipped", "display-name": "Shipments shipped" }] @@ -205,7 +205,7 @@ Default MBQL 5 aggregations materialize as `count`, `count_where`, `avg`, `avg_2 ## Operator reference -The full operator vocabulary — filter operators (`=`, `!=`, `<`, `between`, `contains`, `is-null`, …), aggregation functions (`count`, `sum`, `avg`, `distinct`, `count-where`, `share`, …), expression operators (arithmetic, string, temporal), temporal-bucketing units, and binning strategies — lives in this skill's `references/operators.md`, in the CLI's numeric-id form. Load it on demand rather than dumping the schema: +The full operator vocabulary — filter operators (`=`, `!=`, `<`, `between`, `contains`, `is-null`, …), aggregation functions (`count`, `sum`, `avg`, `distinct`, `count-where`, `share`, …), expression operators (arithmetic, string, temporal), temporal-bucketing units, and binning strategies — lives in this skill's `references/operators.md`, in numeric-id form. Load it on demand rather than dumping the schema: ```bash mb skills get mbql --full # appends references/operators.md to this body @@ -217,7 +217,7 @@ mb skills path mbql # → the skill dir; then Read references/operator ## Don't - Don't mint a `lib/uuid` for every clause — they're optional; omit them and the server fills them in. Mint (with `mb uuid`) only the clause you need to reference; never invent, hard-code, or copy a UUID (duplicates are rejected server-side). -- Don't put the options object anywhere but slot 1, and don't use the legacy `["field", id, opts]` order. +- Keep the options object in slot 1 of every clause — `[op, {options}, ...args]`, id last (`["field", {}, 1779]`). The legacy `["field", id, opts]` order (id second) is rejected pre-flight. - Don't wrap an MBQL 5 body in `{type:"query", query:…}` — `dataset_query` / `source.query` / `definition` is the flat `mbql/query`. - Don't author MBQL 4 by hand — build it in the UI and pull it with `… get --full --json`. - Don't skip the `--dry-run` loop on a non-trivial query — it's free and exact. diff --git a/skill-data/robot-data-engineer/SKILL.md b/skill-data/robot-data-engineer/SKILL.md new file mode 100644 index 0000000..1530350 --- /dev/null +++ b/skill-data/robot-data-engineer/SKILL.md @@ -0,0 +1,142 @@ +--- +name: robot-data-engineer +description: The front door for turning a database into something a non-technical person can use - clean tables, reusable definitions, dashboards, and answers - all through the `mb` CLI. A light router - it works out where the user is (raw data? clean tables? ready to chart? just need a question answered?), sets up auth and how hands-on they want to be, then loads the right specialized skill. Load when someone wants to "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "report on who registered / signed up / responded", "analyze X", "be my data analyst / data engineer", "set up analytics for X", or asks for the whole journey rather than one step. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Robot Data Engineer + +You're the front door, not the worker. Point the user at the right tools and get out of the way. The work lives in four specialized skills; ask the user directly which one(s) they need right now, set up shared context once, and hand off. The moment you know which skills should be loaded and in which order, load the first and let it drive. + +The three stages: + +1. **Raw data → clean tables** — `data-transformation`. Turns a messy, normalized source database into a small set of wide, clean, analysis-ready tables. +2. **Clean tables → reusable definitions** — `semantic-layer`. Turns those tables into segments (saved filters), measures (saved calculations), and metrics (official numbers) the whole team reuses. +3. **Tables/definitions → human understanding** — Two different skills, depending on what the user needs. + A. Charts and dashboards? `visualization`. Builds the questions and dashboards people look at. + B. Plain-language analysis? `data-analysis`. Given a user's question, this queries the clean data, sanity-checks, analyzes, hands back a plain-language report. + +Stages 3A and 3B are not sequential, but options: answering-in-prose and charting are two different things you can do with clean data; route to whichever the goal calls for. Users describe a goal, not a stage. Map the goal to a stage, confirm, and route. + +In some cases, the user will want to do all of 1-3 sequentially; in other cases, just one or two of the stages. + +--- + +## Setup — do this once, up front + +Settle two things before routing so the child skills don't re-ask: + +1. **Auth.** Pick the profile per `core`'s **Auth & profiles** section — `mb auth list --json`; one → use it, several → ask which, none → ask the user to `mb auth login` — then carry `--profile ` into everything. (Canonical recipe; restated here because the router may run before `core` is loaded.) + +2. **How hands-on they want to be** (the autonomy slider). Ask once, plainly, remember it for the whole session, and tell the child skill the chosen mode so they aren't asked again: + + > Quick thing — how hands-on do you want to be? + > • **Check with me on everything** — I'll run each step past you first. + > • **Balanced** (default) — I'll decide the obvious stuff and ask only when it matters. + > • **Just go** — I'll do what makes sense and show you the result. + +Two things you always own, regardless of mode and regardless of which child ran: + +- **When genuinely unsure, ask — never assume.** Pass this expectation down. +- **The final hard stop.** Before the user treats anything as done, give a plain-language recap of what now exists and hand them something to open and eyeball. The child skills stop within their own stage; you stop at the end of the journey. + +--- + +## Shared Contract + +This is the single source for the rules every child skill follows. Children carry a one-line summary and point back here; this is the full text. When a child runs directly (loaded without going through this router), it's told to read this section first — so treat it as the contract for the whole family, not just the router. + +**Who you're talking to.** A non-technical user who knows their domain well — they understand the business (events, customers, invoices, whatever it is) but not databases. Talk in their terms. + +**Jargon.** Skip warehouse vocabulary they won't know — grain, fact/dimension table, normalize, denormalize, surrogate key, materialize — and prefer plain phrasing: "one row per \_\_\_", "what it tells you", "links up with", "how full a column is". But don't overdo it: they work with tables, so basic relational terms are fine — table, column, ERD, schema, key, foreign key, cardinality. **wide / long** are borderline — usable, but explain them the first time ("one row per person, with a column for each answer"). And **Metabase's product terms are encouraged** — Question, Model, Segment, Measure, Metric, Transform — they're the user's tools, not database jargon. + +**PII.** Survey and registration data holds personal information — names, emails, phone numbers, emergency contacts. Before showing it row-by-row (a roster, a sample of rows), ask whether to display, aggregate, or mask. Default to aggregate counts/breakdowns unless the user wants the actual list. + +**Capability limits — know what you can't do.** The `mb` CLI can author and query content, but it isn't the whole Metabase product. When the user asks for something outside its reach — alerts/subscriptions, applying a segment as a dashboard filter, scheduled emails, permissions UI — say so plainly and offer the nearest thing the CLI _can_ do. Don't attempt it, hit a server error, and surface raw SQL or a stack trace; name the limit up front. + +**Permission denied — stop, diagnose, offer a way back.** When a query fails with "permission denied", the one thing you must never do is quietly run a _different_ readable table and present its numbers as the answer (that's how a question about the customers table gets silently answered with a lookalike table from another schema). Instead, in order: + +1. **Stop.** Don't substitute another table and pass it off as the answer. +2. **Surface and diagnose in plain, friendly terms.** Name what was denied and the likely reason. The usual three: _right table, wrong login_ — it exists, but this CLI login isn't granted it (common on staging/isolated setups — a configuration thing, not a problem with their data); _right name, wrong copy_ — a readable table of the same or similar name lives in another schema or database; _name slightly off_ — what they called it isn't quite the real table name. For example: "I can't read `analytics.account` — this login doesn't have access to it. That's usually a staging-permissions thing, not a problem with your data." +3. **Offer to search — don't auto-crawl.** Ask first: "Want me to look for a table with a similar name that this login _can_ read?" Only on yes, run `mb search ` / `mb table list`, and surface any match as a **confirm question**, never as a substituted answer: "There's `dbt_models.account` I can read — did you mean that one?" +4. **Hand control back.** Don't propose or run a fix you can't reliably execute — no `GRANT` statements, no profile-switching. The recovery is the user's call. + +**Scratch files.** Working files — transform/query/patch JSON bodies, notes — go in `./.scratch` in the current working directory, **never `/tmp`**. Better permissions, it persists across the session, and the user can open and review it. `mkdir -p ./.scratch` if it isn't there yet. + +**Talking to the user.** Habits that are easy to slip on (see also "Questions must carry their own context" below): + +- **Don't reference things they never saw.** If _you_ built a helper table or ran a probe earlier, don't name it as if they were watching — reintroduce it in their terms, or don't mention it. +- **Assume they read only the last ~30 lines.** Don't lean on context from far up the conversation; restate what they need to act on your question. +- **Plain permission requests.** Don't paste a wall of SQL or JSON and ask "run this?". Summarize the action in one sentence — "Want me to add a column linking registrations to accounts?" — and offer to show the details if they ask. + +**Autonomy slider.** Ask once, up front (the router does this in Setup), then remember it for the whole session — children read the chosen mode, they don't re-ask: + +> Quick thing — how hands-on do you want to be? +> • **Check with me on everything** — I'll run each step past you first. +> • **Balanced** (default) — I'll decide the obvious stuff and ask only when it matters. +> • **Just go** — I'll do what makes sense and show you the result. + +**When genuinely unsure, ask — never assume.** + +**Questions must carry their own context.** The user may not have been reading along — people hit go, step away, and skim the stretches where you think out loud. So whenever you ask for input, the context the question depends on goes _right before it_, not as a back-reference. "Given the mismatch I found earlier, what would you like to do?" forces a scroll-back; lead with a short recap instead: + +> I have a question for you — quick recap so it makes sense: +> +> - I found a mismatch in ... +> - This matters because ... +> - Here's what I was thinking, but I need to check ... +> +> The question. + +Recap only the few points the question turns on — enough to answer cold, not a replay of everything you did. + +**The final hard stop.** Before the user treats anything as done, give a plain-language recap of what now exists and hand them something to open and eyeball. + +--- + +## Work out where they are, then route + +Don't make the user name a _stage_ — but do find out _where their data lives_ before you go looking for it. + +**Ask before you crawl.** If you don't already know which database, schema, or table the user means, ask — one plain question short-circuits a dozen tool calls. The asymmetry: if they name a **database**, ask which **schema**; if they name a **table**, ask which **database** it's in. "If you don't know, no problem — I'll look" is the fallback, not the opening move. Only crawl the instance when the user genuinely doesn't know where things are. + +**When you do crawl — the efficient ladder** (cheap, narrowest-first; never pull whole-warehouse rollups): + +- Walk down: `mb db list` → `mb db schemas ` → `mb db schema-tables ` → `mb table list [--db-id]` → `mb table fields ` / `mb table metadata `. +- Have a _name_ to look for rather than a tree to walk? Use `mb search [--models] [--db-id]` instead of crawling. +- Need to know what's actually in a column? `mb field summary ` (row/distinct counts) and `mb field values ` (sample values). +- **If a database looks freshly connected, or a table the user expects isn't showing up, offer to sync** — `mb db sync-schema --wait` — before concluding the table doesn't exist. + +**Then read the shape to pick a stage.** Are there raw, normalized, SaaS-synced-looking tables (lots of tables, coded columns, `*_field`/`*_choice` lookups)? Or already wide, clean, human-readable ones? Any segments/measures/metrics (`mb segment list`, `mb measure list`, `mb card list`) or dashboards (`mb dashboard list`)? + +**Map goal + state to a skill:** + +| What the user wants / what's there | Load | +| -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| "Clean up / flatten / make sense of" raw, normalized data; no clean tables yet | `data-transformation` | +| Clean tables exist; "make this reusable", "define active customers / revenue / MRR officially", "so everyone uses the same definition" | `semantic-layer` | +| Tables (and maybe definitions) exist; "chart this", "build a dashboard", "show me X over time" | `visualization` | +| Clean tables exist; "answer this question", "who registered", "what did people say", "analyze / report on / summarize X" (wants a written answer, not a chart) | `data-analysis` | +| "Do the whole thing" / "set up analytics for X" from raw data | start at `data-transformation`, then continue down the journey (see below) | + +Load a skill with `mb skills get `. Then **hand off** — the child owns its own flow, asking and stopping within its stage. Don't narrate the child's work or duplicate its steps. + +**If the state and the goal disagree** — they ask for a dashboard but there are only raw tables — say so plainly and offer the earlier stage first: _"There aren't clean tables to chart yet — want me to build those first, then we'll chart them?"_ Don't silently build on raw data. + +--- + +## The whole journey + +For the full arc (raw → dashboard), run the stages in order, handing off to each child in turn. Let each child's stopping point double as a check-in: clean tables exist and look right → definitions → charts. No heavy gate between stages (children handle their own), but in **Check with me on everything** mode confirm the user's happy before starting the next, and always finish with your end-of-journey recap. + +A user can drop in at any stage — that's the point of detecting state. Someone with clean tables who just wants metrics goes straight to `semantic-layer`; don't drag them back through cleaning. + +--- + +## Don't + +- **Hand the work to the child skill — don't do it yourself.** The moment you'd be writing transform SQL or a segment definition here, stop and `mb skills get` the right child; let it drive. You route and set up context; the child does the work. +- **Don't re-ask the autonomy question** once it's set; pass it down. +- **Don't skip the starting-state check** and assume raw data — a user with clean tables shouldn't be sent through cleaning. +- **Don't build on raw data when the goal needs clean tables** — route to the earlier stage first. +- **Don't drop the final recap** — you own the end-of-journey hard stop even though each child stops within its own stage. diff --git a/skill-data/semantic-layer/SKILL.md b/skill-data/semantic-layer/SKILL.md new file mode 100644 index 0000000..730fd58 --- /dev/null +++ b/skill-data/semantic-layer/SKILL.md @@ -0,0 +1,166 @@ +--- +name: semantic-layer +description: Turn clean, analysis-ready tables into a shared vocabulary the org reuses - Metabase segments (saved filters, e.g. active customers), measures (saved calculations, e.g. net revenue), and metrics (official numbers, e.g. monthly recurring revenue) - so people stop reinventing the same definition five ways. Find the questions people keep asking, propose definitions in plain language, graft them onto what the org already tracks, build them via `mb segment` / `mb measure` / `mb card` create. For a non-technical user who knows their domain. Load when someone wants to "make this reusable", "define X officially", "standardize how we calculate Y", or "create a segment / measure / metric". Strategy skill for designing reusable definitions; for raw `mb segment` / `mb measure` mechanics, use `core`. +allowed-tools: Read, Write, Edit, Bash, AskUserQuestion +--- + +# Semantic Layer + +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + +Your job: take the clean, analysis-ready tables that already exist and turn the **questions people keep asking** into **shared, reusable definitions** — so "active customer", "net revenue", and "monthly recurring revenue" mean one thing across the whole organization, not five slightly-different things in five people's saved questions. + +You build three kinds of reusable thing. These are real Metabase features with real names — **use the Metabase names** (segment, measure, metric) and teach them to the user as you go. They're product vocabulary, not jargon. Pair the name with a plain gloss the first time, then use it freely: + +- **Segment** — a saved filter on a table. A reusable row-selector: "Active customers", "orders over $100", "EU shipments". People pick it from the **Filter** block in the query builder instead of re-typing the conditions. (Docs: .) +- **Measure** — a saved aggregation on a table. A reusable calculation: "Net Promoter Score", "average order value". People pick it from the **Summarize** block instead of re-writing the formula. Only works on questions built directly on the measure's table. (Docs: .) +- **Metric** — a reusable aggregation that lives in a **collection** (a folder), not bolted to a table. "Monthly recurring revenue", "weekly active users". It's the org's official definition of an important number, can be saved into the **Library**, and can carry a default time dimension for charting. (Docs: .) + +Introduce each like: _"I'll save this as a **segment** — that's Metabase's word for a reusable filter, so you can pull up active customers with one click anytime."_ After that, just say "segment". + +This skill runs **after** the analysis-ready tables exist (build those with transforms — load `mb skills get transform`). Segments and measures only reach one table — no joins, no nesting (see the docs' Limitations sections) — so a semantic layer on raw, normalized tables is nearly useless: a real answer rarely lives in a single raw table. **Wide clean tables first, segments/measures/metrics second.** + +You drive everything through the `mb` CLI. Load the CLI skills you'll need: + +```bash +mb skills get core # auth, profiles, db/table/field inspection, query, search +mb skills get mbql # the definition bodies (filters and aggregations) are MBQL 5 +``` + +Authentication is the user's job. Check `mb auth list --json`; if one profile exists, use it; if several, ask which; if none, ask them to log in. Pass `--profile ` to every command. + +--- + +## Who you're talking to + +A **non-technical user who knows their domain well.** They know the business — who an "active" customer is, what counts as "revenue" — but not databases. So: + +- **Teach the words a curious non-engineer can follow; skip the deep-internals jargon.** Two sets are fine and worth teaching: Metabase product terms (**segment, measure, metric, collection, Library, the Filter / Summarize blocks**) and common data words a domain user can reasonably learn (**table, column, foreign key, schema, join, filter, row**) — gloss them once, then use them. Avoid **deep-internals jargon** that buys nothing for this user: grain, cardinality, normalize/denormalize, surrogate key, MBQL, `table_id`, materialize. Prefer the plain effect when it's clearer ("this number needs data from two tables" reads easier than "this needs a join across two fact tables") — but you don't have to contort around "foreign key" or "schema". +- **Talk about the question, then name the object.** Lead with what it does for them, then attach the term: _"I'll save 'big orders' as a segment so you can pull them up with one click."_ Not a bare "I'll create a segment on `table_id` 235." +- **Be a helpful colleague, not an engineer reporting status.** Elide the wiring (ids, query bodies, the CLI). Ask the one question that actually matters. + +--- + +## Autonomy — honor the mode the user set + +The user already picked an autonomy mode (the router's Shared Contract asks the slider once, up front — don't re-ask). Apply it to building definitions: + +| Mode | What you do | +| ----------------------- | ----------------------------------------------------------------------------------------------------------- | +| **Check on everything** | Confirm every single definition (name + plain description) before building it. | +| **Balanced** (default) | Build the obvious ones; ask only on the judgment calls (the prudential list below) and anything ambiguous. | +| **Just go** | Build the whole set, surface judgment calls as "here's what I picked and why — say the word to change any." | + +**Two things never bend, in any mode:** + +1. **When you're genuinely unsure — ask. Never assume.** "Just go" means _decide the obvious_, not _guess on the unclear_. A wrong-but-confident definition of "active customer" is worse than a one-line question. +2. **The final gate is a hard stop (see Phase 3).** No mode auto-publishes. You always stop, recap in plain language, and hand the user something to eyeball before anything goes live. + +--- + +## Two kinds of decisions + +**Hard rules — absolutes, never ask:** + +1. **Never invent what a word means — pin it to real data.** "Active customer" is not yours to define. Before you build a segment for it, find out (from the user, or from how the data actually behaves) what _they_ mean: ordered in the last 90 days? Has a live subscription? Logged in this month? Confirm against actual values, then build to that. A definition built on a guessed meaning is a silent lie everyone then trusts. +2. **Keep the language at the level set in "Who you're talking to."** Metabase terms and common data words (table, column, foreign key, schema, join) are fine and worth teaching; deep-internals jargon (grain, cardinality, surrogate key, `table_id`) is not. +3. **Don't bury filters inside measures.** A measure should aggregate _what it's given_; let the user combine it with a segment at question time, rather than welding a filter into the measure. Welded-in filters collide and confuse when someone applies their own filter on top — and the metrics doc explicitly recommends against it. (Use conditional forms like `SumIf`/`CountIf` for "sum only the paid ones" — that's part of the measure's formula, not a hidden row filter.) +4. **Respect where each thing can reach.** Segments and measures work **only** on a question built _directly_ on their own table — not through a join, not on a question-built-on-a-question (the Limitations sections of both docs say so). If the definition needs more than one table's worth of data, you do **not** force a join into it. You go back and make the analysis-ready table wider first (a transform), then define on that. Quietly building a segment/measure that silently won't show up where the user expects is a hard-rule violation. +5. **Don't strand a metric on a single data source.** A metric is data-source-bound the same way — defined on table X, it appears only on questions built on table X, not on anything derived from it. If you need it to span sources, the answer is again a wider table first (a transform), not a join in the definition. +6. **Every definition keeps a clear, plain name and a one-line description in the user's words.** The name is what they'll see in a menu six weeks from now with no memory of this conversation. "Active customers (ordered in last 90 days)" beats "active_seg_v2". + +**Prudential calls — genuinely contextual, state your lean, let the user decide** (skip the ask in "Just go" mode — pick your lean, flag it): + +- **Which kind of thing is it?** Same wish, three possible homes: + - "Let me filter to just the active ones" → a **segment** (saved filter). + - "Let me add up revenue the same way everywhere, on this table" → a **measure** on the table. + - "Revenue is an _official company number_ people pull onto dashboards" → a **metric** in a collection, with a default month-by-month view so it charts cleanly. Lean: make it a metric when it's a headline figure the org reuses across many questions/dashboards; keep it a measure when it's a table-local convenience. +- **Where the metric lives.** Metrics sit in a collection (folder). Lean: put the org's blessed ones in the shared **Library** so they surface prominently; keep experimental ones in a working collection until trusted. +- **Default time dimension for a metric.** A monthly default makes it chart nicely on a dashboard, but doesn't lock anyone out of other groupings. Lean: set a sensible default (usually month) for anything headline; leave it off for raw counts that aren't inherently time-series. +- **How strict a segment is.** "Active" = last 30 vs 90 days is a real business call with no right answer from the data alone. Lean: surface the few reasonable thresholds with how many rows each catches, let the user pick. + +Phrase a prudential call as a lean plus a nod: + +> "I'd save 'revenue' as a metric — Metabase's term for an official, reusable number — rather than a table-only measure, since people pull it onto dashboards a lot. Good?" + +--- + +## The process + +### Phase 0 — Understand what's reusable (quietly) + +Don't narrate. One "Let me see what's here and how people are already slicing it" is plenty. Keep it cheap — compact column listings, `LIMIT`/`GROUP BY` samples, never whole-warehouse rollups. + +1. **Confirm the analysis-ready tables exist.** List tables; find the wide, clean ones (a transform step's output). If the user is pointing you at raw normalized tables, say so plainly and suggest building the clean table first — don't build a hobbled semantic layer on raw data. +2. **Find the questions people keep asking.** Search existing saved questions and dashboards (`mb search`, `mb card list`) for repeated filters and repeated calculations — the same "status = active" written eleven times, five hand-rolled versions of revenue. Those repeats _are_ the semantic layer waiting to be named. This is the highest-signal input; mine it before proposing anything. +3. **Learn the real meanings.** For every candidate segment ("active", "churned", "high-value"), find what the words map to in actual values — distinct values of a status column, the spread of an amount column. Never define on a guessed meaning (hard rule 1). +4. **Graft onto what the org already tracks.** This is the part a model does worst and a human does best, so lean on the user: a new definition is far more useful when it lines up with the entities and language the organization _already_ uses. Before inventing "customer health score", ask whether there's already a notion of an active/at-risk customer in their world, and match it. Isolated definitions that don't connect to the existing model are low-value. Ask; don't infer the connection from column names. +5. **Check reach before promising.** For each candidate, confirm it can actually live where it needs to: a single-table segment/measure must sit on the table people will build questions on; a multi-table answer needs a wider table first (hard rules 4–5). Catch this now, not after building something that won't appear. + +### Phase 1 — Propose the shared vocabulary (plain language) + +Show, in plain terms, the definitions worth saving — lead with what each _does for the user_, and name the Metabase feature so they learn it: + +**Segments — saved filters** (so people pull up the same set with one click): + +> • **Active customers** — ordered in the last 90 days. ~2,400 of your 6,000 customers. +> • **Big orders** — over $100. About 1 in 5 orders. + +**Measures — saved calculations** (so everyone adds it up the same way): + +> • **Net revenue** — total paid, minus refunds. +> • **Average order value** — net revenue per order. + +**Metrics — official numbers** (the headline figures, for dashboards): + +> • **Monthly recurring revenue** — I'd save this as a metric with a month-by-month default, since it's a dashboard headline. Good? + +Then surface what you're _not_ saving and why ("I left 'orders this week' alone — it's a one-off, not something you'd reuse"). Ask your prudential questions — one at a time, lean-plus-nod. In "Check on everything" mode, confirm each definition here before Phase 3. In "Balanced", ask only the judgment calls. In "Just go", state your picks and move on. + +### Phase 2 — Iterate (cheap, nothing built yet) + +Adjust names, meanings, thresholds, and which-kind-of-thing until the user is happy. Re-confirm the final list in one short recap. If a definition turns out to need more than one table, say so plainly and point back to making the table wider — don't smuggle in a join. + +### Phase 3 — Build, verify quietly, then hard-stop + +Build each agreed definition. Mechanics (load `mbql` for the definition bodies): + +- **Segment** → `mb segment create`. Body: `name`, `table_id`, and a `definition` (a flat MBQL filter clause). Update later with `mb segment update ` — needs a `revision_message` (the audit note: _why_ it changed). Never delete-and-recreate. +- **Measure** → `mb measure create`. Body: `name`, `table_id`, and a `definition` holding **exactly one** aggregation. Same `revision_message` rule on update. +- **Metric** → `mb card create` with the metric shape (`type: "metric"`) — it lives in a **collection**, carries a `dataset_query` (the aggregation) and an optional default time dimension. Put org-blessed ones in the Library collection. + +Then **verify what the user can't see**, before you hand back: + +- Each segment actually narrows the rows you expect (`mb query` / preview the count — does "active customers" really return ~2,400?). +- Each measure and metric returns a sane number, not null or an error. +- Each definition shows up **where the user will look for it** — on a question built on the right table. A segment that silently won't appear (built on the wrong table, or one that would need a join) is the classic silent failure; catch it here. + +Then **stop. Hard gate — every mode, no exceptions.** Recap in plain language and hand the user something to open and eyeball: + +> Done. Here's the shared set you can now reuse: +> +> **Segments** (saved filters — in the **Filter** block on the Customers and Orders tables): +> • **Active customers** — ordered in the last 90 days +> • **Big orders** — over $100 +> +> **Measures** (saved calculations — in the **Summarize** block): +> • **Net revenue** • **Average order value** +> +> **Metric** (in your **Library**, charts by month): +> • **Monthly recurring revenue** +> +> Open any of those tables' Filter or Summarize block in Metabase to see them in place and try one — give it a look before you start building dashboards on top. + +End on that plain-language map. It's what the user reads to trust the result — and it's what stops a wrong definition from quietly propagating into everything built next. + +--- + +## A worked example (for your reference, not the user's) + +User: _"Everyone calculates 'active users' differently — can you make it official?"_ + +- **Don't** create a segment from the phrase alone. **Find the real meaning first:** search existing questions — three people filter on "last seen in the last 30 days", two on "subscription status = active". That's the ambiguity to resolve. Ask: "I see two takes on 'active' — seen in the last 30 days, or has a live subscription. Which do you mean?" (hard rule 1). +- They say "live subscription, and seen in the last 30 days." **Check reach:** both pieces of info must live on the one table people build questions on. If subscription status and last-seen sit on two different tables, a single segment can't span them (hard rule 4) — to the user: "those two facts live in different places right now, so I'll widen your Customers table to carry both first, then save the filter on it." Build the transform, then the segment on the wide table. +- Build it as a segment on the wide table. **Verify** the row count is plausible. **Recap** plainly and stop: "Saved **Active users** — live subscription and seen in the last 30 days — as a segment on your Customers table; it's in the Filter block there. Have a look before you build on it." + +The shape recurs: a word people use loosely → pin it to real values → check it can live where they'll use it → build → verify → hard-stop with a plain recap. diff --git a/skill-data/transform/SKILL.md b/skill-data/transform/SKILL.md index 270686d..6a445c7 100644 --- a/skill-data/transform/SKILL.md +++ b/skill-data/transform/SKILL.md @@ -8,7 +8,7 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion A **transform** persists the result of a query (native SQL or MBQL) to a warehouse table the user can read from cards, dashboards, and other transforms. It runs on a schedule (via `transform-job`) or on-demand (`transform run`). -This skill covers the create-and-run flow. The general flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). +Flag conventions, body-input precedence, and output flags live in the `core` skill (`mb skills get core`). Deciding _which_ transforms to build — modeling a whole raw database into a set of clean, analysis-ready tables — is the `data-transformation` skill (`mb skills get data-transformation`). ## Body shape @@ -17,37 +17,32 @@ A transform has two halves: - `source` — the query to run (`type: "query"`, with `query.type` of `native` or `mbql`). - `target` — the warehouse destination (`type: "table"`, with `database`, `schema`, `name`). -Native SQL is the simplest source and the easiest to author by hand (see "Create + run" below). MBQL is what the Metabase UI emits and is more verbose; pull a sample with `mb transform get --full --json` if you need its shape. +Native SQL is the simplest source and the easiest to author by hand. MBQL is what the Metabase UI emits and is more verbose; pull a sample with `mb transform get --full --json` if you need its shape. For an **MBQL 5** `source.query` (`lib/type: "mbql/query"`), the body shape, the "options object is always second" clause rule, UUID minting, aggregation/order-by refs, naming aggregation output columns, and the `--print-schema` → `--dry-run` validation loop are all in the `mbql` skill — **`mb skills get mbql`**. The MBQL-5 pre-flight on `transform create`/`update` is documented there too (legacy MBQL 4 and native sources skip it). For a transform target, naming your aggregation output columns matters more than usual — a bare `count` / `avg_2` becomes the warehouse column name; see the `mbql` skill's "Naming aggregation output columns". ## Create + run (native SQL) ```bash -cat > /tmp/transform.json <<'EOF' -{ - "name": "user_counts_by_signup_year", - "description": "Sample transform: counts users by year of signup", - "source": { - "type": "query", - "query": { - "type": "native", - "database": , - "native": { - "query": "SELECT date_trunc('year', created_at)::date AS signup_year, COUNT(*)::int AS user_count FROM public.users GROUP BY 1 ORDER BY 1" - } - } - }, - "target": { - "type": "table", - "database": , - "schema": "public", - "name": "user_counts_by_signup_year" - } -} -EOF - -TRANSFORM_ID=$(mb transform create --file /tmp/transform.json --profile --json | jq -r '.id') +# Author the SQL formatted — it's what `mb transform get` and the Metabase editor show. +cat > ./.scratch/user_counts_by_signup_year.sql <<'SQL' +SELECT + date_trunc('year', created_at)::date AS signup_year, + COUNT(*)::int AS user_count +FROM public.users +GROUP BY 1 +ORDER BY 1 +SQL + +# Embed it with jq --rawfile so the newlines survive as \n in valid JSON (don't hand-write the SQL as one line). +jq -n --rawfile q ./.scratch/user_counts_by_signup_year.sql \ + '{ name: "user_counts_by_signup_year", + description: "Sample transform: counts users by year of signup", + source: { type: "query", query: { type: "native", database: , native: { query: $q } } }, + target: { type: "table", database: , schema: "public", name: "user_counts_by_signup_year" } }' \ + > ./.scratch/transform.json + +TRANSFORM_ID=$(mb transform create --file ./.scratch/transform.json --profile --json | jq -r '.id') mb transform run "$TRANSFORM_ID" --wait --profile --json ``` @@ -57,8 +52,8 @@ Notes: - Target `schema` is the schema the result table is written into (e.g. `public`). - `--wait` on `transform run` polls until status is `succeeded` or `failed`. Without it you only get `{message: "Transform run started", run_id, final: null}` and have to poll yourself. - `--sync` implies `--wait`, then waits until the run's output table is registered — the run registers it itself, no `db sync-schema` needed — adding `target_table_id` to the envelope. Use it when you'll build MBQL on the output (see "Inspect"). -- The `--json` envelope is shape-stable: `{message, run_id, final}` (plus `target_table_id` under `--sync` — a number, or `null` if the table didn't register before the timeout). `final` is always present — `null` when `--wait` is omitted or the run never started, otherwise a full `TransformRun` object with `status` and `message`. On a failed run (`final.status` ∈ {`failed`, `timeout`, `canceled`}) the CLI exits 1 and writes a one-line summary `transform run failed` to stderr; the failure detail lives only in `final.message` on stdout, so `jq -r '.final.message'` is where to look. -- The heredoc with single-quoted `'EOF'` prevents shell from interpolating any `$vars` inside the SQL. +- The `--json` envelope is shape-stable: `{message, run_id, final}` (plus `target_table_id` under `--sync` — a number, or `null` if the table didn't register before the timeout). `final` is `null` when `--wait` is omitted or the run never started, otherwise a full `TransformRun` object with `status` and `message`. On a failed run (`final.status` ∈ {`failed`, `timeout`, `canceled`}) the CLI exits 1 and writes a one-line summary `transform run failed` to stderr; the failure detail lives only in `final.message` on stdout, so `jq -r '.final.message'` is where to look. +- **Keep the SQL formatted.** Author it multi-line in `./.scratch/.sql` and embed with `jq --rawfile` (jq ≥1.6, which JSON-encodes the file so newlines become `\n`). The stored `native.query` is what `mb transform get` and the Metabase editor render — a single-line blob is valid JSON but unreadable when anyone opens the transform. Single-quote the heredoc delimiter (`<<'SQL'`) so the shell leaves `$vars` in the query alone (e.g. Postgres `$1`, `$$`). - `transform create --json` returns the agent-facing compact projection: `{id, name, description, source_type, target: {type, database, schema, name}, target_db_id}`. Read `target.schema`/`target.name` directly off the create output — no follow-up `transform get` needed to verify where the transform will write. - If a transform with the same `name` already has a YAML representation on disk under the configured remote-sync repo, `create` mints a `_2` suffix on the exported filename (the new transform gets a fresh `entity_id`; the prior one isn't touched). For "iterate on the same concept" workflows, prefer `transform update ` — see "Iterating on a failing transform" below. - **`collection_id` only accepts a collection in the `:transforms` namespace.** Transforms aren't filed next to cards and dashboards — passing a normal analytics collection id (the kind a dashboard lives in) fails create/update with `collection_id: A Transform can only go in Collections in the :transforms namespace.` Omit `collection_id` to leave the transform uncollected (the common case), or create one with `mb collection create --body '{"name":"…"}' --namespace transforms --json` and pass the returned `id`. Cards and dashboards you build **on top of** the transform's output table go in ordinary collections as usual — so "put the transform and its dashboard in collection X" generally means _X holds the dashboard + cards; the transform stays in the transforms namespace._ @@ -70,14 +65,14 @@ mb transform list --profile --json mb transform get --profile --full --json # full transform incl. last run summary ``` -After a run the table physically exists in the warehouse, but Metabase addresses tables/columns by numeric id, so **MBQL and the UI can't reference a brand-new table until the instance syncs** (native SQL — a native `card` or `mb query` against `.` — reads it immediately). Run and register in one step with `--sync`: +After a run the table physically exists in the warehouse, but Metabase addresses tables/columns by numeric id, so **MBQL and the UI can't reference a brand-new table until the instance syncs** (native SQL — a native `card` or `mb query` against `.` — reads it immediately). Run and register in one step with `--sync`. ```bash TABLE_ID=$(mb transform run --sync --profile --json | jq -r '.target_table_id') mb table get "$TABLE_ID" --include fields --profile --json # field ids for MBQL ``` -`--sync` runs the transform and polls until its output table is registered, returning the id as `target_table_id` — the run registers the table itself, so no `db sync-schema` is needed. On `target_table_id: null` (still syncing when the poll timed out; exit 0) re-poll `mb transform get --full --json` until the `target_table_id` / `table` linkage lands. +On `target_table_id: null` (still syncing when the poll timed out; exit 0) re-poll `mb transform get --full --json` until the `target_table_id` / `table` linkage lands. Columns and types are inferred from the result set; change the SELECT shape and the next run fails on a column mismatch — drop the table first (`transform delete-table `). A changed shape also needs a re-run with `--sync` before MBQL sees the new/renamed columns. @@ -104,14 +99,14 @@ Notes: ## Update body: send only writable keys, never round-trip the GET body -`transform update ` is **PATCH semantics** — only send the fields you actually want to change. The endpoint accepts exactly these writable keys: +`transform update ` is **PATCH semantics** — only send the fields you want to change. The endpoint accepts exactly these writable keys: ``` name, description, source, target, run_trigger, tag_ids, collection_id, owner_user_id, owner_email ``` -**Don't paste the output of `transform get` into a `transform update` body.** The GET response carries server-side fields (`id`, `entity_id`, `created_at`, `updated_at`, `creator_id`, `last_run`, `target_db_id`, `target_table_id`, `source_type`, `source_database_id`, `source_readable`, `creator`, `owner`, `table`, …) that the PUT endpoint isn't built to handle. Currently, unknown top-level keys flow into `t2/update!` and produce a leaked H2 SQL error like: +**Don't paste the output of `transform get` into a `transform update` body.** The GET response carries server-side fields (`id`, `entity_id`, `created_at`, `updated_at`, `creator_id`, `last_run`, `target_db_id`, `target_table_id`, `source_type`, `source_database_id`, `source_readable`, `creator`, `owner`, `table`, …) that the PUT endpoint isn't built to handle. Unknown top-level keys flow into `t2/update!` and produce a leaked H2 SQL error like: ``` Column "TAGS" not found; SQL statement: @@ -130,13 +125,15 @@ Right shape — patch only what changes: # Rename only: mb transform update --body '{"name":"renamed"}' --profile --json -# Rewrite the SQL only: -cat > /tmp/patch.json <<'EOF' -{ "source": { "type": "query", "query": { "type": "native", - "database": , - "native": { "query": "SELECT … FROM public.orders" } } } } -EOF -mb transform update --file /tmp/patch.json --profile --json +# Rewrite the SQL only — author it formatted, embed with jq: +cat > ./.scratch/orders.sql <<'SQL' +SELECT … +FROM public.orders +SQL +jq -n --rawfile q ./.scratch/orders.sql \ + '{ source: { type: "query", query: { type: "native", database: , native: { query: $q } } } }' \ + > ./.scratch/patch.json +mb transform update --file ./.scratch/patch.json --profile --json # Change tag membership (note: tag_ids, not tags): mb transform update --body '{"tag_ids":[1,3]}' --profile --json @@ -148,12 +145,12 @@ If you really must round-trip, project to the writable subset: mb transform get --full --profile --json \ | jq '{name, description, source, target, run_trigger, tag_ids, collection_id, owner_user_id, owner_email} | with_entries(select(.value != null))' \ - > /tmp/patch.json + > ./.scratch/patch.json ``` ## Iterating on a failing transform -When `transform run` fails and you want to retry with a fixed body, **prefer `transform update --file body.json` over `transform delete ` + `transform create`.** Update keeps the same row, the same `entity_id`, the same materialized table, and the same on-disk YAML filename. Concretely this means: +When `transform run` fails and you want to retry with a fixed body, **prefer `transform update --file body.json` over `transform delete ` + `transform create`.** Update keeps the same row, the same `entity_id`, the same materialized table, and the same on-disk YAML filename: - `git-sync export` produces **one** clean commit containing only the fix, instead of "broken transform" + "remove broken transform" landing as two commits in `git log`. - You don't have to chase `_2` suffixes minted when two YAMLs share a `name` on disk (see the `transform create` notes above). @@ -163,17 +160,18 @@ Recipe: ```bash # 1. Try once -ID=$(mb transform create --file /tmp/t.json --profile --json | jq -r '.id') +ID=$(mb transform create --file ./.scratch/t.json --profile --json | jq -r '.id') mb transform run "$ID" --wait --profile --json # → failed # 2. Fix the body in place; PATCH only what changed. # Source-only patch — keeps name, target, tags untouched on the server. -cat > /tmp/source-patch.json <<'EOF' -{ "source": { "type": "query", "query": { "type": "native", - "database": , - "native": { "query": "" } } } } -EOF -mb transform update "$ID" --file /tmp/source-patch.json --profile --json +cat > ./.scratch/source.sql <<'SQL' + +SQL +jq -n --rawfile q ./.scratch/source.sql \ + '{ source: { type: "query", query: { type: "native", database: , native: { query: $q } } } }' \ + > ./.scratch/source-patch.json +mb transform update "$ID" --file ./.scratch/source-patch.json --profile --json # 3. Re-run mb transform run "$ID" --wait --profile --json # → succeeded diff --git a/skill-data/visualization/SKILL.md b/skill-data/visualization/SKILL.md index 6cb6fc8..6234e7f 100644 --- a/skill-data/visualization/SKILL.md +++ b/skill-data/visualization/SKILL.md @@ -6,6 +6,8 @@ allowed-tools: Read, Write, Edit, Bash, AskUserQuestion # Visualization: pick the chart, then set it +> **Shared contract (read first).** This skill is part of the `robot-data-engineer` family and follows its shared rules: audience is a non-technical user, so no database jargon (skip "normalize"/"grain"; ERD/foreign key are fine; explain "wide"/"long" the first time you use them). Ask before showing PII row-by-row (names, emails, phones) — default to aggregates. When asked for something the CLI can't do (alerts, dashboard filters), name the limit instead of erroring into raw SQL. Honor the autonomy mode the user picked. Full text and the autonomy slider live in the router — run `mb skills get robot-data-engineer` and read its **Shared Contract** if you haven't. + A card has two presentation fields alongside its `dataset_query`: - **`display`** — the chart type (`bar`, `line`, `pie`, `scalar`, `map`, `table`, …). One closed set; pick from the enum below. @@ -13,7 +15,7 @@ A card has two presentation fields alongside its `dataset_query`: Nothing validates `visualization_settings` — there is no pre-flight to fail past. A `display` typo or a misnamed key is accepted by the API; the card just renders as a default table or drops the setting. So **the feedback loop is read-back, not pre-flight**: after `card create`/`update`, confirm with `mb card get --full --json` (or open the card) that it rendered as intended. -General flag conventions and body-input precedence live in the `core` skill (`mb skills get core`); the `dataset_query` itself is the `mbql` skill's job (`mb skills get mbql`). This skill is only about how the result is displayed. +Flag conventions and body-input precedence live in the `core` skill (`mb skills get core`); the `dataset_query` itself is the `mbql` skill's job (`mb skills get mbql`). This skill is only about how the result is displayed. Two steps: **(1) pick the `display` that fits the data**, then **(2) bind the data columns and set options**. @@ -64,7 +66,7 @@ Closed `display` enum (card-level, non-hidden): `table`, `bar`, `line`, `area`, `graph.dimensions`, `graph.metrics`, `pie.dimension`, `pie.metric`, `scalar.field`, `funnel.metric`, `map.latitude_column`, `sankey.source`, … all take **output column-name strings** — the names the query _produces_, not field ids. A `count` aggregation outputs the column `count`; a breakout on a field outputs that field's name; a named aggregation outputs its `name`. These strings are **identical in the API form and the portable (git-sync) form** — no numeric-vs-name footgun here. -So the names you put in `visualization_settings` come from the query's output, not from `mb field`/`mb table`. If you set `name` on an aggregation (see the `mbql` skill), use that same string here. +The names come from the query's output, not from `mb field`/`mb table`. If you set `name` on an aggregation (see the `mbql` skill), use that same string here. ## Minimum-viable settings per chart family (API form) @@ -136,7 +138,7 @@ For anything beyond a single dimension + metric — combo charts, conditional fo mb card get --full --json | jq '.visualization_settings' ``` -Paste that block into your `card create`/`update` body. The server produced it, so it's valid for that `display`. This beats guessing keys from memory, and it's token-cheap. +Paste that block into your `card create`/`update` body. The server produced it, so it's valid for that `display`. ## Full per-visualization key catalog diff --git a/skills/metabase-cli/SKILL.md b/skills/metabase-cli/SKILL.md index f463d41..eabc850 100644 --- a/skills/metabase-cli/SKILL.md +++ b/skills/metabase-cli/SKILL.md @@ -19,3 +19,9 @@ Before running any `mb` command, load the workflow content from the CLI: mb skills get core # auth, flag conventions, every command group mb skills list # everything available on the installed version ``` + +**Doing a whole job, not one command?** If the user wants an outcome — "make sense of my data", "build a data model", "go from raw data to a dashboard", "answer questions about my data", "be my data analyst", "set up analytics for X" — load the front-door router instead and let it drive: + +```bash +mb skills get robot-data-engineer +``` diff --git a/tests/e2e/card.e2e.test.ts b/tests/e2e/card.e2e.test.ts index fbfacd0..4ab591d 100644 --- a/tests/e2e/card.e2e.test.ts +++ b/tests/e2e/card.e2e.test.ts @@ -375,10 +375,14 @@ describe("card e2e", () => { }); // Pre-flight is bypassed; the server then rejects the malformed body with an HttpError (exit 1). - // The card-create endpoint surfaces the underlying app-DB constraint message via the response - // envelope; we assert a stable substring of that surfaced error. + // The surfaced message for the bad Database ID is version-dependent: v58-61 leak the app-DB + // constraint, while head validates at the query layer first. Accept either exact substring. expect(result.exitCode).toBe(1); - expect(cliErrorMessage(result.stderr)).toContain('NULL not allowed for column "DATABASE_ID"'); + const surfaced = cliErrorMessage(result.stderr); + const rejectedBadDatabaseId = + surfaced.includes('NULL not allowed for column "DATABASE_ID"') || + surfaced.includes("missing or invalid Database ID (:database)"); + expect(rejectedBadDatabaseId).toBe(true); expect(result.stdout).toBe(""); }); @@ -518,11 +522,21 @@ describe("card e2e", () => { env: authEnv(), }); - // PUT /api/card/:id accepts dataset_query as an opaque map and does not validate its inner - // shape, so the bad `database` does not trigger a 400. Bypass is proven by exit 0 — without - // --skip-validate the prior test shows pre-flight rejects with exit 2. - expect(result.exitCode, result.stderr).toBe(0); - expect(parseJson(result.stdout, CardCompact).id).toBe(SEEDED.ordersCardId); + // Bypass is proven by the absence of the CLI pre-flight: exit code is never 2 and the MBQL 5 + // validation message never fires (without --skip-validate the prior test shows exit 2). What the + // server then does with the bad `database` is its own authority and is version-dependent: v58-61 + // accept dataset_query as an opaque map (exit 0, card returned), while head validates the query + // layer and rejects it (exit 1, "missing or invalid Database ID"). + expect(result.exitCode).not.toBe(2); + expect(result.stderr).not.toContain("card.dataset_query validation failed"); + if (result.exitCode === 0) { + expect(parseJson(result.stdout, CardCompact).id).toBe(SEEDED.ordersCardId); + } else { + expect(result.exitCode).toBe(1); + expect(cliErrorMessage(result.stderr)).toContain( + "missing or invalid Database ID (:database)", + ); + } }); it("create with dataset_query: {} is rejected at the CLI boundary (no H2 stack trace)", async () => { diff --git a/tests/e2e/skills.e2e.test.ts b/tests/e2e/skills.e2e.test.ts index 6d64981..13249cd 100644 --- a/tests/e2e/skills.e2e.test.ts +++ b/tests/e2e/skills.e2e.test.ts @@ -9,9 +9,13 @@ import { cleanupConfigHome, mkTempConfigHome, runCli } from "./run-cli"; const BUNDLED_VISIBLE_NAMES = [ "core", + "data-analysis", + "data-transformation", "document", "git-sync", "mbql", + "robot-data-engineer", + "semantic-layer", "transform", "visualization", ] as const; @@ -29,7 +33,7 @@ describe("skills e2e", () => { return dir; } - it("list returns the six bundled non-hidden skills, sorted by name", async () => { + it("list returns the ten bundled non-hidden skills, sorted by name", async () => { const result = await runCli({ args: ["skills", "list", "--json"], configHome: await makeIsolatedConfigHome(), @@ -120,7 +124,7 @@ describe("skills e2e", () => { expect(result.exitCode).toBe(2); expect(result.stderr).toContain( - "unknown skill name(s): does-not-exist (available: core, document, git-sync, mbql, transform, visualization)", + "unknown skill name(s): does-not-exist (available: core, data-analysis, data-transformation, document, git-sync, mbql, robot-data-engineer, semantic-layer, transform, visualization)", ); });