From 553228044359f34833510348d574a7e1ad89911a Mon Sep 17 00:00:00 2001
From: Kyle Denney <4227399+kdenney@users.noreply.github.com>
Date: Tue, 9 Jun 2026 09:56:58 -0500
Subject: [PATCH] Initial version of playwright-testing plugin

---
 .claude-plugin/marketplace.json               |   6 +
 README.md                                     |   1 +
 .../.claude-plugin/plugin.json                |  28 ++
 .../bitwarden-playwright-testing/CHANGELOG.md |  15 +
 .../bitwarden-playwright-testing/README.md    | 161 +++++++++
 .../agents/code-explorer/AGENT.md             |  62 ++++
 .../agents/context-gatherer/AGENT.md          |  66 ++++
 .../agents/report-compiler/AGENT.md           |  51 +++
 .../agents/service-manager/AGENT.md           |  51 +++
 .../agents/service-mapper/AGENT.md            |  47 +++
 .../agents/test-planner/AGENT.md              |  55 +++
 .../agents/test-runner/AGENT.md               |  76 +++++
 .../references/tool-policy.md                 |  50 +++
 .../scripts/playwright.config.json            |   7 +
 .../skills/build-test-cases/SKILL.md          | 138 ++++++++
 .../references/billing-test-data.md           |  48 +++
 .../skills/compiling-test-report/SKILL.md     |  74 ++++
 .../templates/report-template.html            | 149 ++++++++
 .../determining-required-services/SKILL.md    |  33 ++
 .../references/services.md                    |  87 +++++
 .../skills/executing-web-tests/SKILL.md       | 220 ++++++++++++
 .../exploring-application-context/SKILL.md    | 146 ++++++++
 .../references/known-flows.md                 | 319 ++++++++++++++++++
 .../skills/reading-mailcatcher-api/SKILL.md   | 157 +++++++++
 .../references/email-patterns.md              |  95 ++++++
 .../scripts/read-mailcatcher.sh               | 110 ++++++
 .../skills/test-web-changes/SKILL.md          | 272 +++++++++++++++
 .../verifying-environment-health/SKILL.md     |  67 ++++
 .../scripts/health-check.sh                   | 114 +++++++
 .../scripts/preflight-check.sh                |  60 ++++
 30 files changed, 2765 insertions(+)
 create mode 100644 plugins/bitwarden-playwright-testing/.claude-plugin/plugin.json
 create mode 100644 plugins/bitwarden-playwright-testing/CHANGELOG.md
 create mode 100644 plugins/bitwarden-playwright-testing/README.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/code-explorer/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/context-gatherer/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/report-compiler/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/service-manager/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/service-mapper/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/test-planner/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/agents/test-runner/AGENT.md
 create mode 100644 plugins/bitwarden-playwright-testing/references/tool-policy.md
 create mode 100644 plugins/bitwarden-playwright-testing/scripts/playwright.config.json
 create mode 100644 plugins/bitwarden-playwright-testing/skills/build-test-cases/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/build-test-cases/references/billing-test-data.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/compiling-test-report/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/compiling-test-report/templates/report-template.html
 create mode 100644 plugins/bitwarden-playwright-testing/skills/determining-required-services/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/determining-required-services/references/services.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/executing-web-tests/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/exploring-application-context/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/exploring-application-context/references/known-flows.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/references/email-patterns.md
 create mode 100755 plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh
 create mode 100644 plugins/bitwarden-playwright-testing/skills/test-web-changes/SKILL.md
 create mode 100644 plugins/bitwarden-playwright-testing/skills/verifying-environment-health/SKILL.md
 create mode 100755 plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/health-check.sh
 create mode 100755 plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/preflight-check.sh

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index d288c89..1077d60 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -92,6 +92,12 @@
       "source": "./plugins/bitwarden-design-tools",
       "version": "0.1.0",
       "description": "Design toolkit for Bitwarden — non-persona skills for the design lifecycle. Content style guide reference, Figma Dev Mode MCP usage, Bitwarden brand application, design-to-engineering handoff prep, Design System governance, and the Product and Design Jira workflow. Composed by the bitwarden-designer agent and usable standalone."
+    },
+    {
+      "name": "bitwarden-playwright-testing",
+      "source": "./plugins/bitwarden-playwright-testing",
+      "version": "1.0.0",
+      "description": "Automated UI testing framework for Bitwarden web changes using Playwright"
     }
   ]
 }
diff --git a/README.md b/README.md
index 8204a80..7849fb8 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ A curated collection of plugins for AI-assisted development at Bitwarden. Enable
 | [bitwarden-design-tools](plugins/bitwarden-design-tools/)           | 0.1.0   | Design toolkit: content style guide, Figma Dev Mode MCP, Bitwarden brand application, handoff prep, Design System governance, Product and Design Jira       |
 | [bitwarden-devops-engineer](plugins/bitwarden-devops-engineer/)     | 0.1.3   | DevOps engineering assistant: workflow compliance linting, action security auditing, and org-wide CI/CD remediation                                         |
 | [bitwarden-init](plugins/bitwarden-init/)                           | 1.2.0   | Initialize and enhance CLAUDE.md files with Bitwarden's standardized template format                                                                        |
+| [bitwarden-playwright-testing](plugins/bitwarden-playwright-testing/) | 1.0.0 | Automated UI testing framework for Bitwarden web changes using Playwright                                                                                   |
 | [bitwarden-product-analyst](plugins/bitwarden-product-analyst/)     | 0.1.5   | Product analyst agent for creating comprehensive Bitwarden requirements documents from multiple sources                                                     |
 | [bitwarden-security-engineer](plugins/bitwarden-security-engineer/) | 1.2.0   | Application security engineering: vulnerability triage, threat modeling, and secure code analysis                                                           |
 | [bitwarden-software-engineer](plugins/bitwarden-software-engineer/) | 1.0.0   | Software engineer agent for a Bitwarden product team. Implements stories, tasks, and bugs with code quality, performance, security, and team comms in mind. |
diff --git a/plugins/bitwarden-playwright-testing/.claude-plugin/plugin.json b/plugins/bitwarden-playwright-testing/.claude-plugin/plugin.json
new file mode 100644
index 0000000..d6fb6f3
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/.claude-plugin/plugin.json
@@ -0,0 +1,28 @@
+{
+  "name": "bitwarden-playwright-testing",
+  "version": "1.0.0",
+  "description": "Automated UI testing framework for Bitwarden web changes using Playwright",
+  "author": {
+    "name": "Bitwarden",
+    "url": "https://github.com/bitwarden"
+  },
+  "homepage": "https://github.com/bitwarden/ai-plugins/tree/main/plugins/bitwarden-playwright-testing",
+  "repository": "https://github.com/bitwarden/ai-plugins",
+  "keywords": [
+    "testing",
+    "web",
+    "playwright",
+    "bitwarden",
+    "ui-testing",
+    "automation"
+  ],
+  "agents": [
+    "./agents/context-gatherer/AGENT.md",
+    "./agents/code-explorer/AGENT.md",
+    "./agents/service-mapper/AGENT.md",
+    "./agents/test-planner/AGENT.md",
+    "./agents/service-manager/AGENT.md",
+    "./agents/test-runner/AGENT.md",
+    "./agents/report-compiler/AGENT.md"
+  ]
+}
diff --git a/plugins/bitwarden-playwright-testing/CHANGELOG.md b/plugins/bitwarden-playwright-testing/CHANGELOG.md
new file mode 100644
index 0000000..ba57f0b
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/CHANGELOG.md
@@ -0,0 +1,15 @@
+# Changelog
+
+All notable changes to bitwarden-playwright-testing will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0] - 2026-05-28
+
+### Added
+
+- Initial release of the `bitwarden-playwright-testing` plugin
+- `test-web-changes` skill orchestrating a full UI test pipeline from Jira ticket or feature description to HTML report
+- Seven-agent team: `context-gatherer`, `code-explorer`, `service-mapper`, `test-planner`, `service-manager`, `test-runner`, `report-compiler`
+- Skills: `exploring-application-context`, `determining-required-services`, `verifying-environment-health`, `build-test-cases`, `executing-web-tests`, `reading-mailcatcher-api`, `compiling-test-report`
diff --git a/plugins/bitwarden-playwright-testing/README.md b/plugins/bitwarden-playwright-testing/README.md
new file mode 100644
index 0000000..8166639
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/README.md
@@ -0,0 +1,161 @@
+# Bitwarden Playwright Testing Plugin
+
+Automated end-to-end UI testing for Bitwarden web changes using Playwright.
+
+## Overview
+
+This plugin provides a single user-facing skill, `test-web-changes`, that orchestrates a seven-agent team to take a Jira ticket, implementation plan, or feature description and turn it into a full Playwright test run. The team gathers context, explores the affected codebases, builds grounded test cases, verifies the local dev environment is ready, executes the tests, and compiles an HTML report with full-page screenshots.
+
+## Prerequisites
+
+**Required Claude Code plugin:** Install the [`playwright-cli`](https://github.com/microsoft/playwright-cli) plugin before using this plugin. Render verification and all browser test execution depend on it.
+
+**Bitwarden dev environment:** Start all required services before invoking the plugin. The plugin only verifies — it never starts, builds, or stops services.
+
+- **Dev infrastructure (containers)**: start Bitwarden's mssql, mailcatcher, and azurite containers via either Docker Compose (`server/dev/docker-compose.yml`) or .NET Aspire (`server/AppHost`).
+- **Application services**: start the web frontend (`clients` Nx workspace, `nx serve web --configuration=commercial`), plus the .NET services your test will touch (typically `Api`, `Identity`, and depending on scope `Billing`, `billing-pricing`, `Admin` / Bitwarden Portal, `Notifications`, `Events`, `Icons`).
+
+The plugin's `verifying-environment-health` skill confirms Docker dev containers, application `/alive` endpoints, and the Angular bootstrap before tests begin. If anything is missing it halts with a hint pointing to what to start.
+
+## Installation
+
+```bash
+/plugin install bitwarden-playwright-testing@bitwarden-marketplace
+```
+
+Restart Claude Code after installing for the plugin to become active.
+
+## Usage
+
+Invoke the team-lead skill:
+
+```bash
+/test-web-changes <jira-ticket-id | feature-plan-path | feature-description> [--confirm]
+```
+
+**Examples:**
+
+```bash
+/test-web-changes PM-1234
+/test-web-changes ~/code/bitwarden/server/plans/PM-1234-billing-ui.md
+/test-web-changes "exempt orgs from billing automation when the flag is set" --confirm
+```
+
+**Flags:**
+
+- `--confirm`: pause after the test plan is built and display the test cases for review before executing.
+
+## How it works
+
+`test-web-changes` runs an eight-task pipeline as the team lead. Each agent returns its artifact as a markdown response; the team lead writes those responses verbatim to `.playwright-testing-artifacts/<slug>/` and dispatches the next agent.
+
+| Task | Agent | Artifact |
+|---|---|---|
+| 1 | `context-gatherer` | `context-<timestamp>.md` |
+| 2 | `code-explorer` | `app-context-<timestamp>.md` |
+| 3 | `service-mapper` | `services-<timestamp>.md` |
+| 4 | `test-planner` | `test-cases-<timestamp>.md` |
+| 5 | *(team lead composes)* | `test-plan-<timestamp>.md` |
+| 6 | `service-manager` *(verifies the environment via `verifying-environment-health`)* | *(no artifact; halts the run on failure)* |
+| 7 | `test-runner` | `test-results-<timestamp>.md` |
+| 8 | `report-compiler` | `report-<timestamp>.html` |
+
+## Agents and skills
+
+### Agents
+
+| Component | Description |
+|---|---|
+| `context-gatherer` | Acquires feature source content (Jira ticket, plan file, or free-form description) and extracts structured context. |
+| `code-explorer` | Reads the context, explores the affected codebases, and produces the Application Context (changed files, routes, selectors, verification points). |
+| `service-mapper` | Reads the Application Context and maps changed file paths to the local services that need to be running. |
+| `test-planner` | Reads context and Application Context artifacts and builds grounded test cases via the `build-test-cases` skill. |
+| `service-manager` | Reads the test plan and dispatches `verifying-environment-health` to confirm Docker dev containers, application `/alive` endpoints, and the Angular bootstrap. Halts the run on any failure. Never starts or stops services. |
+| `test-runner` | Launches the `playwright-cli` agent to execute test cases with guardrails and screenshots, and returns structured results. |
+| `report-compiler` | Compiles an HTML report from the test results. |
+
+### Skills
+
+| Skill | Description |
+|---|---|
+| `test-web-changes` | Team-lead orchestration skill; the only user-facing entry point. |
+| `exploring-application-context` | Surveys changed files, routes, selectors, and verification points across affected repositories. |
+| `determining-required-services` | Maps changed file paths to the local services that need to be running. |
+| `verifying-environment-health` | Verifies Docker dev containers via preflight, application services via the health-check script, and Angular bootstrap via render verification. Halts on the first failure. |
+| `build-test-cases` | Builds Playwright test cases with a web-first policy from plan context. |
+| `executing-web-tests` | Launches the `playwright-cli` agent with guardrails and screenshots. |
+| `reading-mailcatcher-api` | Reads Bitwarden emails via the Mailcatcher REST API for verification links, magic links, and OTP codes. |
+| `compiling-test-report` | Writes an HTML report from agent results. |
+
+## Web-first policy
+
+All test actions (account creation, org setup, form submission) happen through the browser UI. Direct database queries, REST API calls outside the browser, and CLI tools are never permitted during setup or test execution.
+
+## Billing tests
+
+When the plan involves billing flows, `build-test-cases` bakes the Stripe test card and related values directly into the test-case steps, which run through the web UI. A billing-related 400 error during execution halts all testing immediately.
+
+## Out of scope
+
+The following Bitwarden surfaces are not testable via this plugin (no Playwright UI surface):
+
+- **Browser extensions** (`clients/apps/browser/`) — require browser extension testing setup
+- **Desktop app** (`clients/apps/desktop/`) — requires Electron testing setup
+- **CLI** (`clients/apps/cli/`) — command-line tool, no browser UI
+
+## Plugin structure
+
+```
+bitwarden-playwright-testing/
+├── .claude-plugin/
+│   └── plugin.json
+├── README.md
+├── CHANGELOG.md
+├── agents/
+│   ├── context-gatherer/AGENT.md
+│   ├── code-explorer/AGENT.md
+│   ├── service-mapper/AGENT.md
+│   ├── test-planner/AGENT.md
+│   ├── service-manager/AGENT.md
+│   ├── test-runner/AGENT.md
+│   └── report-compiler/AGENT.md
+├── scripts/
+│   └── playwright.config.json           # Sets ignoreHTTPSErrors for dev certs
+└── skills/
+    ├── test-web-changes/SKILL.md        # Team-lead entry point
+    ├── exploring-application-context/
+    │   ├── SKILL.md
+    │   └── references/
+    ├── determining-required-services/
+    │   ├── SKILL.md
+    │   └── references/services.md
+    ├── verifying-environment-health/
+    │   ├── SKILL.md
+    │   └── scripts/
+    │       ├── preflight-check.sh       # Verifies Docker and dev-env preconditions
+    │       └── health-check.sh          # Polls service /alive endpoints
+    ├── build-test-cases/
+    │   ├── SKILL.md
+    │   └── references/billing-test-data.md
+    ├── executing-web-tests/
+    │   └── SKILL.md
+    ├── reading-mailcatcher-api/
+    │   ├── SKILL.md
+    │   └── references/email-patterns.md
+    └── compiling-test-report/
+        ├── SKILL.md
+        └── templates/
+            └── report-template.html
+```
+
+## Contributing
+
+See [CONTRIBUTING.md](../../CONTRIBUTING.md) for plugin development guidelines, structure requirements, versioning rules, and the review process.
+
+## Changelog
+
+See [CHANGELOG.md](CHANGELOG.md) for version history.
+
+## License
+
+See [LICENSE.txt](../../LICENSE.txt)
diff --git a/plugins/bitwarden-playwright-testing/agents/code-explorer/AGENT.md b/plugins/bitwarden-playwright-testing/agents/code-explorer/AGENT.md
new file mode 100644
index 0000000..7b57bbc
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/code-explorer/AGENT.md
@@ -0,0 +1,62 @@
+---
+name: code-explorer
+version: 1.0.0
+description: Planning-phase agent for the test-web-changes team. Reads the context markdown from context-gatherer, calls exploring-application-context, and returns the Application Context as a markdown response for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:exploring-application-context
+color: orange
+user-invocable: false
+tools: Read, Skill, Grep, Glob, Bash(git diff *), Bash(git log *)
+---
+
+You are the codebase exploration agent for the Bitwarden web test pipeline. Read the context markdown, explore the codebase, and return an Application Context markdown response.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Inputs
+
+Your task prompt includes:
+- **Context artifact path**: path to `context-<timestamp>.md` from context-gatherer
+
+## Step 1 — Read context artifact
+
+Read the context markdown file. Extract these sections by their headers:
+- `## Affected Repositories` — list items
+- `## Feature Description` — paragraph text
+- `## Acceptance Criteria` — list items
+
+## Step 2 — Explore application context
+
+Invoke `Skill(bitwarden-playwright-testing:exploring-application-context)`. Pass the text below with no angle-bracket placeholders remaining in the actual call:
+
+```
+The working directory is the bitwarden root. Each repo is a subdirectory.
+
+Affected repos: <comma-separated repos from the context markdown>
+Feature description: <Feature Description section text>
+Acceptance criteria:
+<Acceptance Criteria items as a numbered list>
+
+Return the complete Application Context with two top-level sections: ## States and ## Flows. State and flow definitions follow the state-centric schema documented in the skill.
+```
+
+Wait for the complete Application Context.
+
+## Step 3 — Return app-context as markdown
+
+Your final response is the app-context artifact itself, formatted as markdown. Do not preface or follow your response with any other commentary; the entire response is the artifact content.
+
+The skill serializes the Application Context exactly once. As a defensive backstop only, if the skill output ever contains more than one `## States` section, extract only the content beginning at the LAST `## States` heading — discard all earlier passes and any prose between them. Never concatenate multiple passes.
+
+Return exactly this structure:
+
+```markdown
+## Application Context
+
+<the final ## States … ## Flows block from the skill output — containing exactly two top-level sections>
+```
+
+Do not summarize, reformat, or omit any part of the final block. Downstream agents depend on the full content.
+
+Self-check before returning: your first non-empty line must be `## Application Context`, the response must contain exactly one `## States` section and exactly one `## Flows` section, and no other top-level (`##`) sections. If the self-check fails, surface the failure to the team lead instead of returning a malformed artifact.
diff --git a/plugins/bitwarden-playwright-testing/agents/context-gatherer/AGENT.md b/plugins/bitwarden-playwright-testing/agents/context-gatherer/AGENT.md
new file mode 100644
index 0000000..e48078e
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/context-gatherer/AGENT.md
@@ -0,0 +1,66 @@
+---
+name: context-gatherer
+version: 1.0.0
+description: Planning-phase agent for the test-web-changes team. Receives a Jira ticket ID, plan file path, or free-form feature description and returns structured context (affected repos, feature description, acceptance criteria) as a markdown response for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-atlassian-tools:researching-jira-issues
+color: green
+user-invocable: false
+tools: Read, Skill, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_comments, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_remote_links, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_issues, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_confluence_page, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_confluence_page_comments, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_child_pages, mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__download_attachment
+---
+
+You are the context-gathering agent for the Bitwarden web test pipeline. Acquire the feature source content, extract structured context, and return it as a markdown response.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Inputs
+
+Your task prompt includes:
+- **Input type**: `jira-ticket`, `plan-file`, or `description`
+- **Input value**: the ticket ID, file path, or description text
+
+## Step 1 — Acquire source content
+
+**`jira-ticket`**: Invoke `Skill(bitwarden-atlassian-tools:researching-jira-issues)` with the ticket ID. Wait for the full synthesis including linked issues, sub-tasks, and acceptance criteria.
+
+**`plan-file`**: Read the file at the provided path with the `Read` tool.
+
+**`description`**: Use the input value directly as the source content.
+
+## Step 2 — Extract context
+
+From the source content, identify:
+
+- **Affected repos**: Any of `clients`, `server`, `billing-pricing` referenced by the content. List all that apply.
+- **Feature description**: 1–3 sentences describing what the feature does and why.
+- **Acceptance criteria**: All conditions that must be true for the feature to be complete. For Jira tickets, check the acceptance criteria section, sub-task descriptions, and linked stories.
+
+## Step 3 — Return context as markdown
+
+Return exactly this structure, with every section populated. Do not preface or follow your response with any other commentary:
+
+```markdown
+# Context
+
+**Input Type:** <jira-ticket | plan-file | description>
+**Input Value:** <original value>
+
+## Feature Description
+<1–3 sentences describing what the feature does and why>
+
+## Affected Repositories
+- <repo>
+- <repo>
+
+## Acceptance Criteria
+- <criterion>
+- <criterion>
+
+## Source Summary
+<full Jira synthesis text, file contents, or description — this must be the complete raw source content gathered in step 1.>
+```
+
+Section headers must match exactly (`## Feature Description`, `## Affected Repositories`, `## Acceptance Criteria`, `## Source Summary`) so downstream agents can locate them.
+
+Self-check before returning: your first non-empty line must be `# Context`, and the response must contain the section headers `## Feature Description`, `## Affected Repositories`, `## Acceptance Criteria`, `## Source Summary`.
diff --git a/plugins/bitwarden-playwright-testing/agents/report-compiler/AGENT.md b/plugins/bitwarden-playwright-testing/agents/report-compiler/AGENT.md
new file mode 100644
index 0000000..3d10693
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/report-compiler/AGENT.md
@@ -0,0 +1,51 @@
+---
+name: report-compiler
+version: 1.0.0
+description: Execution-phase standing agent for the test-web-changes team. Reads the test-results artifact, compiles an HTML report via compiling-test-report, and returns the report HTML as a fenced block for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:compiling-test-report
+color: green
+user-invocable: false
+tools: Read, Skill
+---
+
+You are the report compilation agent for the Bitwarden web test pipeline. Read the test results, compile the HTML report, and return its contents as a fenced HTML block.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Inputs
+
+Your task prompt includes:
+- **Test plan path**: path to the test plan markdown file
+- **Test results path**: path to the test-results file the team lead just wrote
+
+## Step 1 — Read test results
+
+`Read` the test-results file at the provided path. The entire file is a single raw output block beginning with `=== TEST RUN RESULTS ===` and ending with `=== TEST RUN COMPLETE: ... ===`. The run totals are on the `=== TEST RUN COMPLETE: N total, N passed, N passed (adaptive), N failed ===` marker.
+
+## Step 2 — Read test plan for services list
+
+Read the test plan file. Extract the `## Required Services` section to get the list of services tested.
+
+## Step 3 — Compile report
+
+Invoke `Skill(bitwarden-playwright-testing:compiling-test-report)`. Pass:
+- Playwright agent results (the full contents of the test-results file)
+- Services tested list (from the Required Services section)
+
+The skill returns the complete HTML document as text.
+
+## Output
+
+Your final response is the HTML report content itself, wrapped in a single fenced ```html``` block. No preface, no commentary, no filename — the team lead handles persistence and naming.
+
+Exact response shape:
+
+    ```html
+    <!DOCTYPE html>
+    …full HTML document, populated from the template…
+    </html>
+    ```
+
+Self-check before returning: your entire response must be a single fenced ```html``` block with no preface or trailing commentary.
diff --git a/plugins/bitwarden-playwright-testing/agents/service-manager/AGENT.md b/plugins/bitwarden-playwright-testing/agents/service-manager/AGENT.md
new file mode 100644
index 0000000..9945292
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/service-manager/AGENT.md
@@ -0,0 +1,51 @@
+---
+name: service-manager
+version: 1.0.0
+description: Execution-phase standing teammate for the test-web-changes team. Reads the test plan, verifies the Bitwarden local dev environment is ready via verifying-environment-health, and signals readiness (or surfaces a failure). Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:verifying-environment-health
+color: purple
+user-invocable: false
+tools: Read, Skill, Bash(*/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/preflight-check.sh), Bash(*/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/health-check.sh *)
+---
+
+You are the environment-verification agent for the Bitwarden web test pipeline. Read the test plan, verify the local dev environment is ready, and signal readiness to the team lead. You never start, build, or stop services — the user is responsible for managing service lifecycle outside this pipeline.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Prerequisites
+
+This agent requires the **playwright-cli** skill to be installed. The `verifying-environment-health` skill uses it for render verification. If `Skill(playwright-cli)` is unavailable, report the error immediately — do not proceed.
+
+## Inputs
+
+Your task prompt includes:
+- **Test plan path**: path to the test plan markdown file.
+- **Artifacts output dir**: absolute path to the run's artifacts folder. Render-verify screenshots are written under `<artifacts-output-dir>/screenshots/`.
+
+## Step 1 — Read the test plan
+
+Read the test plan file and extract:
+- **Required service names**: from the `## Required Services` block, pull the bullet's leading name token (e.g., `- Api — http://localhost:4000 (port 4000)` → `Api`). Collect these as a space-separated list — they are the argv for the health-check script.
+- **Primary test URL**: the bullet marked `**(primary test URL)**` in the same block. Used by the render-verify step inside the skill.
+
+## Step 2 — Verify the environment
+
+Invoke `Skill(bitwarden-playwright-testing:verifying-environment-health)`. Pass the required service names, the primary test URL, and the artifacts output dir.
+
+The skill runs three steps in order (preflight, health check, render verify) and halts on the first failure. Wait for it to return.
+
+## Step 3 — Return the result
+
+Your final response is either a success confirmation or an error block. Do not preface or follow your response with any other commentary.
+
+**On success**, return a single line of exactly this form (passing through the skill's own success line):
+
+```
+Environment verified: <N> services healthy, render OK.
+```
+
+**On failure**, return the skill's failure output verbatim — the offending script's stdout/stderr or the render-verify screenshot path + description. Do not invent a success line.
+
+Self-check before returning: your response is either the one-line success confirmation beginning with `Environment verified:` OR the failure block from the skill. It is never a `# Service State` heading or any other markdown artifact shape.
diff --git a/plugins/bitwarden-playwright-testing/agents/service-mapper/AGENT.md b/plugins/bitwarden-playwright-testing/agents/service-mapper/AGENT.md
new file mode 100644
index 0000000..f130410
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/service-mapper/AGENT.md
@@ -0,0 +1,47 @@
+---
+name: service-mapper
+version: 1.0.0
+description: Planning-phase agent for the test-web-changes team. Reads the app-context artifact, calls determining-required-services, and returns the service list as a markdown response for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:determining-required-services
+color: blue
+user-invocable: false
+tools: Read, Skill
+---
+
+You are the service-mapping agent for the Bitwarden web test pipeline. Read the app-context markdown, determine which local services are required to run the tests, and return the service list as a markdown response.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Inputs
+
+Your task prompt includes:
+- **Context artifact path**: path to `context-<timestamp>.md` from context-gatherer
+- **App-context artifact path**: path to `app-context-<timestamp>.md` from code-explorer
+
+## Step 1 — Read the app-context artifact
+
+Read the app-context markdown file. The app-context has two top-level sections — `## States` and `## Flows`. Extract every route line from the `## States` section: each state's `UI projection` block contains a `Route: <URL>` line. Collect those URLs (deduplicated) — these are the routes you will pass to the skill.
+
+Also read the context artifact and extract the affected repos from its `## Affected Repositories` section.
+
+## Step 2 — Determine required services
+
+Invoke `Skill(bitwarden-playwright-testing:determining-required-services)`. Pass the routes collected in Step 1 and the affected repos. The skill runs its own `git diff --name-only` internally, consults the service dependency map at `references/services.md`, and returns a structured list of required services (name, URL, port) plus a primary test URL.
+
+## Step 3 — Return the services list as markdown
+
+Your final response is the services artifact, formatted as markdown. Do not preface or follow your response with any other commentary; the entire response is the artifact content.
+
+The skill may emit the document across multiple passes. If the skill output contains more than one `## Required Services` section, extract only the content beginning at the LAST `## Required Services` heading — discard all earlier draft passes and any prose between them. Never concatenate multiple passes.
+
+Return exactly this structure:
+
+```markdown
+## Required Services
+
+<the final ## Required Services block from the skill output>
+```
+
+Self-check before returning: your first non-empty line must be `## Required Services`, and that heading must appear exactly once. If the self-check fails, surface the failure to the team lead instead of returning a malformed artifact.
diff --git a/plugins/bitwarden-playwright-testing/agents/test-planner/AGENT.md b/plugins/bitwarden-playwright-testing/agents/test-planner/AGENT.md
new file mode 100644
index 0000000..3a9339d
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/test-planner/AGENT.md
@@ -0,0 +1,55 @@
+---
+name: test-planner
+version: 1.0.0
+description: Planning-phase agent for the test-web-changes team. Reads context and app-context artifacts, calls build-test-cases, and returns test cases markdown for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:build-test-cases
+color: yellow
+user-invocable: false
+tools: Read, Skill
+---
+
+You are the test case construction agent for the Bitwarden web test pipeline. Read the context and app-context markdown artifacts, generate grounded test cases via the build-test-cases skill, and return the skill output verbatim.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+## Inputs
+
+Your task prompt includes:
+- **Context artifact path**: path to `context-<timestamp>.md` from context-gatherer
+- **App-context artifact path**: path to `app-context-<timestamp>.md` from code-explorer
+
+## Step 1 — Read both artifacts
+
+Read the context markdown and the app-context markdown. Extract by header:
+- `## Feature Description` and `## Acceptance Criteria` from the context markdown
+- The full app-context markdown content, which begins with the `## Application Context` heading
+
+## Step 2 — Build test cases
+
+Invoke `Skill(bitwarden-playwright-testing:build-test-cases)`. Structure the call with the feature context followed by the Application Context section:
+
+```
+<Feature Description text from context markdown>
+
+Acceptance criteria:
+<Acceptance Criteria items as a numbered list>
+
+<full app-context markdown content, pasted verbatim>
+```
+
+The skill returns a single markdown document whose first non-empty line is the `## Test Cases` heading.
+
+## Step 3 — Return the skill output
+
+Your final response is the test cases artifact, formatted as markdown. Do not preface or follow your response with any other commentary; the entire response is the artifact content.
+
+The skill may emit the document across multiple passes. If the skill output contains more than one `## Test Cases` heading, extract only the content beginning at the LAST `## Test Cases` heading — discard all earlier draft passes and any prose between them. Never concatenate multiple passes.
+
+Return exactly this structure:
+
+## Test Cases
+...
+
+Self-check before returning: your first non-empty line must be the `## Test Cases` heading, and `## Test Cases` must appear exactly once in your response. If the self-check fails, surface the failure to the team lead instead of returning a malformed artifact.
diff --git a/plugins/bitwarden-playwright-testing/agents/test-runner/AGENT.md b/plugins/bitwarden-playwright-testing/agents/test-runner/AGENT.md
new file mode 100644
index 0000000..0aba54d
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/agents/test-runner/AGENT.md
@@ -0,0 +1,76 @@
+---
+name: test-runner
+version: 1.0.0
+description: Execution-phase standing agent for the test-web-changes team. Reads the test plan, runs Playwright tests via executing-web-tests, and returns the raw test-run output block for the team lead to persist. Do not invoke directly — dispatched by the test-web-changes skill.
+model: sonnet
+skills:
+  - bitwarden-playwright-testing:executing-web-tests
+  - playwright-cli
+color: cyan
+user-invocable: false
+tools: Read, Skill, Bash(playwright-cli:*), Bash(*/bitwarden-playwright-testing/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh *), Bash(stripe get:*), Bash(stripe post /v1/test_helpers/test_clocks/*/advance:*), Bash(curl:*), Bash(ls *)
+---
+
+You are the test execution agent for the Bitwarden web test pipeline. Read the test plan, run all test cases via Playwright, and return the raw test-run output block verbatim.
+
+Use only the tools listed in your allowlist. Do not request permission to use tools outside it — if you would otherwise need to, report the obstacle in your final output instead.
+
+Everything your allowlist grants, you execute inline as an ordinary test step — never as an obstacle and never as a pause point:
+- browser actions via `playwright-cli` (Category 1)
+- email reads via the mailcatcher script (Category 2)
+- external-trigger POSTs via `curl` (Category 3)
+- Stripe reads via `stripe get`, and test-clock advancement via `stripe post .../advance` (Category 4)
+
+A step is an obstacle to report **only** when it requires a tool your allowlist does not grant — for example attaching a test clock, or any Stripe write other than clock advancement. Run what your allowlist covers; report only what it doesn't.
+
+## Loop invariant — when this agent is done
+
+You are done when your final response is the raw output block returned by executing-web-tests, ending in `=== TEST RUN COMPLETE: N total, N passed, N passed (adaptive), N failed ===`. This is identical for fresh and resumed runs. Nothing less counts as completion.
+
+Tool results you receive during execution — from `Bash(...)` or `Skill(...)` — are values for the next step, not cues to end your turn. A returned URL, an extracted token, a single test step's screenshot, or a completed subset of test cases all mean you are mid-run. Keep executing until the run-complete marker is written.
+
+**One exception — `[HUMAN]` step pause.** When the executing-web-tests skill reaches a `[HUMAN]` step, it emits all completed test-case blocks followed by `Need user input:` as the final line. Return that response verbatim and end your turn — this agent instance is finished. The team lead will persist the partial results, surface the question to the user, and re-dispatch a fresh test-runner agent with the user's answer and a checkpoint path. That resumed instance satisfies the loop invariant when it returns a raw output block ending in `=== TEST RUN COMPLETE: N total, N passed, N passed (adaptive), N failed ===`.
+
+## Prerequisites
+
+This agent requires the **playwright-cli** skill to be installed. The `executing-web-tests` skill calls it directly for every browser action. If `Skill(playwright-cli)` is unavailable, report the error immediately — do not proceed.
+
+## Inputs
+
+Your task prompt includes:
+- **Test plan path**: path to the test plan markdown file
+- **Artifacts output dir**: absolute path to the run's artifacts folder (present on both fresh and resume dispatches)
+- **Checkpoint path** *(present only on resume)*: path to `checkpoint-<timestamp>.md` containing raw output blocks from prior segments
+- **Resume** *(present only on resume)*: block containing `Paused at:` (location string, e.g. `"Test Case 3, Setup Step 5: ..."`) and `User's answer:`
+
+## Step 0 — Check for resume context
+
+If the prompt contains `Checkpoint path:` and `Resume:`, this is a resumed run. Extract:
+- **Checkpoint path**, **Paused at** (e.g. `"Test Case 3, Setup Step 5: ..."`), **User's answer**
+
+Read the checkpoint file. Scan for `--- TEST CASE <N>: <name> ---` markers (where `<N>` is any integer and `<name>` is the test case name) to collect the set of already-completed test case numbers — these are skipped in Step 2.
+
+If no resume context is present, proceed normally from Step 1.
+
+## Step 1 — Read the test plan
+
+Read the test plan file and extract:
+- **All test cases**: everything under `## Test Cases`
+
+## Step 2 — Execute tests
+
+Invoke `Skill(bitwarden-playwright-testing:executing-web-tests)`. Pass:
+- **Test cases**: on a fresh run, the full content of the `## Test Cases` section from the test plan. On a resumed run, only the test cases not yet completed — exclude test case numbers in the already-completed set from Step 0 (all cases that ran before the pause), and begin the list with the resuming test case as the first entry.
+- Artifacts output dir
+- Config path: `${CLAUDE_PLUGIN_ROOT}/scripts/playwright.config.json`
+- **Resume instruction** *(resumed run only)*: `Resume: Paused at <paused-at value>. User's answer: <user's answer>.`
+
+Wait for the skill to return. The response is either a complete block ending in `=== TEST RUN COMPLETE ===`, or a partial block ending in `=== PARTIAL RUN — PAUSED ===` followed by `Need user input:`. Return the skill's output verbatim in either case — do not short-circuit while the skill is mid-run, but once it returns (with either terminal marker), return its output immediately.
+
+## Step 3 — Return results
+
+Your final response is the raw output block returned by executing-web-tests, verbatim. Do not add any preface or commentary.
+
+Your response begins with `=== TEST RUN RESULTS ===` and ends with `=== TEST RUN COMPLETE: N total, N passed, N passed (adaptive), N failed ===`. This is the same shape for fresh and resumed runs.
+
+If executing-web-tests instead returned a partial response ending with `Need user input:`, return it verbatim with no wrapping or modification — the team lead will treat it as a pause, append it to the checkpoint, and re-dispatch.
diff --git a/plugins/bitwarden-playwright-testing/references/tool-policy.md b/plugins/bitwarden-playwright-testing/references/tool-policy.md
new file mode 100644
index 0000000..2510287
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/references/tool-policy.md
@@ -0,0 +1,50 @@
+# Bitwarden Web Test Tool Policy
+
+Four categories of steps are permitted during web test planning and execution. Everything else is blocked.
+
+## Category 1 — Web UI Interactions (default)
+
+Use the `playwright-cli` skill for all interactions a user would perform in the browser. This is the default for everything, including verifying test results — if the outcome is visible in the UI, assert it via the browser, not via an API call.
+
+## Category 2 — Email Reading
+
+When a test step requires reading an email (verification links, magic links, OTP codes), use the mailcatcher reader script via Bash. The script accepts `--recipient` and `--pattern` arguments, returns the extracted URL on stdout, retries once on no-match, and exits non-zero if the email never arrives. Do not navigate to `http://localhost:1080` via playwright-cli (CORS blocks browser access).
+
+## Category 3 — External Trigger Simulation
+
+Use direct API calls (curl via Bash) only when the action is initiated by a system external to the Bitwarden application — meaning a system that is not the web vault, Admin portal, or any Bitwarden server service (e.g., the bitwarden.com marketing site, a mobile app, a third-party webhook).
+
+**The qualifying test:** Could a Bitwarden service (web vault, Admin portal, server API) initiate this action for the user? If yes, use that service instead. If no — because the initiator is truly external — then curl is appropriate.
+
+**Canonical example:** `POST /accounts/trial/send-verification-email` is called by bitwarden.com's marketing site, not by the web vault — simulating it with curl is legitimate. If the Admin portal or the web vault purchase flow can perform the action, use those instead. Document every curl call in the setup steps output with the rationale for why no Bitwarden service can initiate this step.
+
+**Examples of what is NOT Category 3:**
+- Applying a coupon to a subscription — use the Admin portal or the web vault purchase flow
+- Creating a subscription discount record — use the Admin portal
+- Setting up a paid organization — use the web vault org creation flow with a test card
+
+**Authoritative source for external trigger parameter values:** When the plan or Jira synthesis contains explicit parameter values for an external trigger request body (productTier, products, trialLength, paymentOptional, etc.), copy them verbatim. Do not substitute values derived from enum definitions found in the codebase. If your code reading conflicts with the plan value, use the plan value and annotate it: `Note: plan specifies productTier: 2. Code enum shows Teams=2, Families=1. Using plan value.`
+
+**Labeling:** Mark every Category 3 step explicitly in both the plan and the execution log:
+  EXTERNAL TRIGGER: <METHOD> <endpoint> — <one-line rationale for why no Bitwarden service can initiate this>
+
+## Category 4 — Stripe Data Queries (read-only)
+
+Use the `invoke-stripe-api` skill (or fall back to direct `stripe get` CLI commands) only to query Stripe data that cannot be obtained through the web UI — for example, listing coupon IDs needed for an Admin portal import flow. Check your available skills list first: if `invoke-stripe-api` is present, use it. If not, use `stripe get` via Bash for GET/read-only queries only.
+
+Do not use Stripe calls to set up state that the application's own flows can create.
+
+Never use Stripe for write operations (POST, PUT, DELETE) — no creating coupons, modifying subscriptions, updating customers, or any other state changes. The one exception is advancing test clocks. All other Stripe access is strictly read-only.
+
+## Never Permitted
+
+- Direct database queries
+- API calls that substitute for UI actions a user could perform in the browser
+- Using API calls to verify test results when the outcome is observable in the UI (always assert via playwright-cli instead)
+- CLI tools not related to service startup (Stripe CLI excepted when used as read-only per Category 4)
+- Stripe write operations (POST, PUT, DELETE — creating coupons, modifying subscriptions, updating customers, or any other Stripe state changes)
+- Editing feature flags or any other application configuration
+
+## Stop Condition
+
+If a step cannot be completed using any of the four permitted categories above, STOP immediately. Return a detailed report of what was completed, where the block occurred, and what approach was tried. Do not improvise or use unapproved tools.
diff --git a/plugins/bitwarden-playwright-testing/scripts/playwright.config.json b/plugins/bitwarden-playwright-testing/scripts/playwright.config.json
new file mode 100644
index 0000000..1898365
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/scripts/playwright.config.json
@@ -0,0 +1,7 @@
+{
+  "browser": {
+    "contextOptions": {
+      "ignoreHTTPSErrors": true
+    }
+  }
+}
diff --git a/plugins/bitwarden-playwright-testing/skills/build-test-cases/SKILL.md b/plugins/bitwarden-playwright-testing/skills/build-test-cases/SKILL.md
new file mode 100644
index 0000000..1aa77c8
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/build-test-cases/SKILL.md
@@ -0,0 +1,138 @@
+---
+name: build-test-cases
+description: Build structured Playwright test cases for Bitwarden web changes. Use when you have plan context (file paths, acceptance criteria, UI flows) and need to define starting URLs, interaction sequences, and screenshot checkpoints. Labels external trigger steps (flows initiated by external systems like the marketing website) so they are visible at approval time. Returns a test case list.
+---
+
+Given the plan context and Application Context (from `exploring-application-context`), build concrete test cases for Playwright execution.
+
+## Prerequisite: Application Context
+
+This skill must receive an `## Application Context` section in the prompt, produced by the `exploring-application-context` skill. The Application Context contains exactly two top-level sections: `## States` and `## Flows`. Use it to ground every test case in the actual codebase:
+
+- **Starting URLs** come from a state's `UI projection > Route` line. Do not infer URLs from Jira descriptions.
+- **Setup sequences** come from `## Flows` entries — each flow declares a `Precondition state:` and `Post-condition state:`, and the planner chains flows by matching post-conditions to required preconditions.
+- **Assertions** come from a state's `UI projection > Verification points`. Each verification point identifies the state; assert it exactly as the Application Context records it — a text-content point by its resolved text (not a container class or `data-testid`), a structure/state point by its selector.
+
+If no Application Context is present, return an error asking the caller to run `exploring-application-context` first.
+
+## Tool Policy
+
+Read `${CLAUDE_PLUGIN_ROOT}/references/tool-policy.md` for the complete four-category tool policy. Apply it throughout test case construction — every step you generate must fall into one of the four categories, Category 3 steps must carry the EXTERNAL TRIGGER label defined in the policy, and no step may write to Stripe or query the database directly.
+
+The mailcatcher reader script is at `${CLAUDE_PLUGIN_ROOT}/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh`. Use this absolute path verbatim in any setup step that reads email.
+
+## Admin Portal
+
+The Bitwarden Admin portal at `http://localhost:62911` is a legitimate application UI for administrative test setup — creating discount records, managing subscription state, verifying users. Use it instead of direct Stripe API calls or database manipulation when it supports the entity you need.
+
+## Setup Steps
+
+Any test case that creates a user account must write the exact email address into the SETUP step. Use the format `testuser-s<N>-<YYYYMMDDHHMMSS>@example.com` where `<N>` is the test case number and `<YYYYMMDDHHMMSS>` is a timestamp generated at plan-writing time. Never use a generic placeholder or reuse the same address across test cases in the same run.
+
+Before writing any setup steps or test step sequences, read the Application Context's `## States` and `## Flows` sections. For each test case:
+
+1. **Identify the precondition state slug** the test requires (e.g., `state:authenticated-premium-user`). Find a flow in `## Flows` whose `Post-condition state(s)` includes that slug, and inline its atomic steps directly into the test case's Setup Steps. If a chain of flows is needed (e.g., signup → purchase-premium), inline each in order.
+2. **For test exercise:** find a flow whose Steps exercise the UI the test verifies → compose its steps inline with assertions inserted at the matching step's inline `- Feedback:` sub-item.
+3. **For test-case-specific steps that don't fit a named flow:** write them inline in the test case's Setup Steps list, intermixed with inlined flow steps as needed.
+
+**If the required precondition state has `Reachable by playwright: no`:** read its `Reach via:` recipe and inline each recipe line as a step in the test case. Preserve `[HUMAN]` markers verbatim. Expand any nested `Run flow:<slug>` invocations to their atomic steps (with parameter substitution baked in at plan-write time) — the test plan does not contain a shared flow definitions section, so all steps must be self-contained.
+
+  **Step placement.** Place the recipe lines at the point in the test case where the state transition occurs:
+  - **Setup Steps** when the unreachable state is the test's precondition (most common — the recipe drives the application *into* the state before the Test Steps assert it).
+  - **Test Steps** when the unreachable state is produced or driven through as part of the test exercise.
+
+  Make the placement decision at plan-write time based on the test's intent. Do not duplicate recipe steps in both sections.
+
+Repetition is acceptable. If the same multi-step sequence appears in two or more test cases, inline it in each — the test plan is generated, not maintained, so DRY across test cases isn't a goal.
+
+Only write setup steps from scratch when no named entry in `## Flows` covers the required precondition. In that case, use the Application Context's `## States` entries (routes, verification points) to identify the right mechanism and break it into individual atomic actions (navigate, fill, click, wait for response).
+
+`Setup Steps:` is mandatory whenever the precondition state requires any of:
+- Navigating the Admin portal to create/import records (e.g., subscription discounts, organization seats) — typically uses `flow:authenticate-admin-portal` plus inline admin actions
+- Querying Stripe for coupon/discount IDs (Category 4 read-only calls)
+- Creating a user account or registering a new organization — typically `flow:create-new-user-and-login`, optionally followed by `flow:purchase-premium-subscription` or `flow:create-paid-org`
+- Purchasing a subscription or plan upgrade via the web vault checkout flow
+
+For these cases, every action needed to reach that state must appear as a numbered `SETUP:` step. The executor will run these steps before the Test Steps begin.
+
+If no precondition requires active setup (e.g., the test case tests a page that is visible to any authenticated user without prior state changes), omit the `Setup Steps:` block entirely.
+
+## Test Case Construction
+
+For each test case, define:
+1. **Starting URL** — from Application Context
+2. **Sequence of interactions** (click, fill, navigate, assert)
+3. **Pass/fail criteria** — what constitutes a pass vs. a failure
+4. **Descriptive name** — used in the report
+
+Be specific:
+- **Good (interaction)**: "Navigate to `https://localhost:8080/#/vault`, click the '+ New Item' button"
+- **Good (assertion)**: "Assert: `[data-testid='discount-section']` — exactly 2 elements | Fail: 0 elements found (server may not be returning Discounts array)"
+- **Bad**: "Verify discounts are shown"
+
+**Exploit the Application Context fully.** Every step and assertion must be grounded in the specific details the Application Context provides — do not paraphrase or generalize when exact information is available:
+
+- **Interaction steps**: Use the exact URL from "UI projection > Route." Name the specific button label, form field, or control. Don't write "fill in payment details" — write "fill the Stripe card number iframe (`[title='Secure card number input frame']`) with `4242424242424242`."
+
+- **Assertion steps**: Use the exact `Selector value` and `Selector type` from "UI projection > Verification points" — a `data-testid`, CSS selector, element role, or `text`. When the observable is **text content** (a message, a localized or computed term, a relabeled control — anything whose point is that the right text renders), the verification point's `Selector type` is `text`: assert the resolved text substring. When the substring is short or could occur elsewhere on the page, keep `Selector type: text` but scope the read to the nearest stable region named in the point's `Source:` rather than searching the whole page; the region only bounds the search and is never the asserted value (never a container class or `data-testid`). When the observable is **structure/state** (count, visible/hidden, enabled/disabled, element presence), assert via the selector. Each assertion must state:
+  1. The selector or text being queried (e.g., `[data-testid="discount-section"]`, or text `"A cohort with this name already exists."`)
+  2. The expected count, text, or state (e.g., "exactly 2 elements", "text contains '-$5.00'")
+  3. What a failure looks like (e.g., "0 elements — server not returning Discounts array")
+
+- **All verification points must appear**: Every item under "UI projection > Verification points" in the Application Context must map to at least one explicit assertion step. If a verification point has no corresponding assertion, the test case is incomplete.
+
+**Interactive elements must be exercised.** When the plan describes collapsible sections, accordions, tabs, expandable cards, or modal triggers, each must have a dedicated step that:
+1. Performs the interaction (click to expand, open tab, trigger modal)
+2. Asserts the content *inside* is correct
+
+Verifying that a header or trigger is visible is not sufficient — the hidden content must also be verified.
+
+**Realistic user paths only.** Every step must be something a real user can do through the UI. A test case may **not** use a DOM bypass — writing to a hidden or disabled field, or otherwise bypassing the form's own change handlers — to construct a precondition. If a scenario can only be staged that way, it is not a valid test case — drop it. (Example of an invalid case: filling a coupon field that the form hides and blanks in the current state — the field is not user-editable, so there is no real user path to that input.)
+
+**Assert states only as the Application Context licenses them.** Build assertions solely from the verification points the Application Context records for a state. Do not synthesize a new initial-state or default-state assertion by re-interpreting a conditional. If the context records "element hidden when X" but does not record "element hidden on initial load," do not assert the initial-load case — the default branch may not satisfy the condition. If the context did not record a point as observable in a state, do not assert it visible in that state.
+
+**No un-grounded test cases.** Every test case must trace to a state, flow, or verification point in the Application Context — its Starting URL, steps, and assertions all come from what the context records. Do not invent a test case for a behavior the context does not model, and do not guess a selector, URL, or query parameter the context did not provide. If a behavior that should be tested is not modeled in the Application Context, surface it as a gap — add a `Notes: Coverage gap — <behavior> not modeled in the Application Context` line on the test case whose scenario is closest to the missing behavior, so a reviewer can spot it for a follow-up context pass.
+
+## Billing Prerequisites Check
+
+Scan the plan's features, acceptance criteria, and file paths for billing signals:
+- Billing operations: subscriptions, Secrets Manager add-on, plan upgrades, payment methods
+- API endpoints containing `subscribe`, `billing`, `payment`, `upgrade`, or `secrets-manager`
+- UI flows navigating to billing settings, subscription pages, or Secrets Manager enablement
+
+If any match, read
+`${CLAUDE_SKILL_DIR}/references/billing-test-data.md`
+before constructing any billing-related test cases, and incorporate the Stripe card numbers,
+iframe selectors, and discount eligibility details directly into the relevant test-case steps.
+
+## Output
+
+Emit a single markdown document with this exact structure. The first non-empty line must be the `## Test Cases` heading — downstream agents anchor on it positionally and shape-validate the response.
+
+```
+## Test Cases
+
+<one block per test case, see Test Cases format below>
+```
+
+Do not preface this document with any narrative or commentary. The entire output is the artifact, beginning with `## Test Cases`.
+
+**Test Cases format** — one block per test case:
+
+```
+**Test Case N: <name>**
+- Starting URL: <exact URL from a state's UI projection Route in the Application Context>
+- Precondition: <one-line summary of the required end-state in plain English (e.g., "A premium user with two active discounts is logged in")>
+- Setup Steps:
+  1. SETUP: <atomic browser interaction, navigation, fill, or click>
+  2. SETUP: [HUMAN] <description of the human action required, e.g. "Attach a Stripe test clock to the subscription">
+  3. SETUP: Use the <skill-name> skill to <task>
+  4. SETUP: Inspect <path> for <pattern>
+  ...
+- Test Steps:
+  1. <atomic browser interaction or navigation>
+  2. Assert: <selector> — <expected value/count/state> | Fail: <what failure looks like>
+  3. [HUMAN] <description of the human action required, when the test exercise itself includes one>
+  ...
+- Notes: <optional free-form notes>
+```
diff --git a/plugins/bitwarden-playwright-testing/skills/build-test-cases/references/billing-test-data.md b/plugins/bitwarden-playwright-testing/skills/build-test-cases/references/billing-test-data.md
new file mode 100644
index 0000000..93669b7
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/build-test-cases/references/billing-test-data.md
@@ -0,0 +1,48 @@
+# Billing Test Data Reference
+
+## Stripe Reference Data
+
+### Stripe Test Mode
+
+The local dev environment is configured with a **Stripe test mode API key** (`server/dev/secrets.json`). This means:
+
+- The web vault payment form renders live Stripe Elements iframes that accept Stripe test card numbers
+- Submitting the form makes real Stripe API calls in test mode — no actual charges occur
+- A successful subscription purchase produces a real Stripe Customer and Subscription in test mode
+
+### Stripe Test Card Numbers
+
+| Card Number | Brand | Use |
+|---|---|---|
+| `4242 4242 4242 4242` | Visa | **Default — always succeeds** |
+| `5555 5555 5555 4444` | Mastercard | Alternative success |
+
+For all test cards: any future expiry (e.g., `12/29`), any 3-digit CVC (e.g., `123`), postal code `12345`.
+
+Do NOT use decline-trigger cards (e.g., `4000 0000 0000 0002`) for setup — those are for testing failure paths, not creating a working subscription.
+
+### Payment Form Iframe Selectors
+
+> **Critical**: Stripe Elements payment fields are iframes embedded within the page. Use Playwright's `frameLocator` to target them — `fill()` on the outer page will not reach the Stripe input fields.
+
+- Card number: `frameLocator('[title="Secure card number input frame"]')`
+- Expiry: `frameLocator('[title="Secure expiration date input frame"]')`
+- CVC: `frameLocator('[title="Secure CVC input frame"]')`
+
+---
+
+## Billing Policies
+
+### How Personal Discounts Work
+
+Personal subscription discounts are **Stripe coupons** that have been imported into the Bitwarden application. The checkout flow reads available discounts from the database and automatically applies any coupon the user is eligible for when the subscription is created — no manual application is needed or allowed.
+
+Discounts apply to Premium personal subscriptions and Families organization plans only (not Teams, Enterprise, or other org tiers).
+
+**Critical:** Discounts are never added to a Stripe customer or subscription directly via the Stripe API, the Admin portal, or any other mechanism. The only supported path is:
+1. A coupon exists in Stripe (created there)
+2. The coupon is imported into the Admin application for use in the Bitwarden discounts system
+3. The user is eligible for that discount
+4. The checkout flow applies it automatically during subscription creation
+
+Never generate test steps that call the Stripe API to attach a coupon to a customer or subscription — that is not how the application works and would produce test state that does not reflect production behavior. To test discount display, read the coupon ID from Stripe (Category 4 — read-only) to ensure it is imported into the Admin application, then complete the premium purchase flow through the web UI; the discount will appear if the user is eligible.
diff --git a/plugins/bitwarden-playwright-testing/skills/compiling-test-report/SKILL.md b/plugins/bitwarden-playwright-testing/skills/compiling-test-report/SKILL.md
new file mode 100644
index 0000000..ae165ce
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/compiling-test-report/SKILL.md
@@ -0,0 +1,74 @@
+---
+name: compiling-test-report
+description: Compile an HTML test report from Playwright agent results for Bitwarden web tests. Use after executing web tests to produce a structured report with per-test-case pass/fail status, screenshot links, and issue summaries. Uses templates/report-template.html. Returns the HTML document as text — the caller persists it.
+---
+Given the Playwright agent results and services-tested list, produce the complete HTML report document as your output. You do not write any files and you do not name the file — the caller persists the content you return.
+
+The test results, screenshot paths, and pass/fail data all come from the `executing-web-tests` skill output — use them directly.
+
+## HTML Report
+
+Use the structure in `${CLAUDE_SKILL_DIR}/templates/report-template.html`. Fill in:
+
+- **Header**: date, plan file path, services tested (with ports), base URL
+- **Summary table**: total / passed / failed / errors counts
+- **Test Results section**: one subsection per test case. Parse each `--- TEST CASE N: <name> ---` block (see "Rendering steps and screenshots" below) and render: status, URL (derived from the first navigate step), **Setup Steps** and **Test Steps** as two separate numbered lists, each step's screenshot inline, notes, and a suggested fix for any failure
+- **Issues Summary**: bullet list of all failures and errors
+- **Recommendations**: follow-up actions (Fix, Investigate, Re-test)
+
+Screenshot paths in the report use relative paths from the report file location — just `screenshots/filename.png`, not the full absolute path.
+
+Each screenshot is rendered as a linked thumbnail, placed inline inside its step's `<li>` (see "Rendering steps and screenshots" below):
+
+```html
+<a class="screenshot-link" href="screenshots/filename.png" target="_blank">
+  <img src="screenshots/filename.png" alt="description">
+</a>
+```
+
+The `.screenshot-link img` CSS rule in the template sets `width: 50%` — do not add inline `style` attributes to the `<img>` tags. Thumbnails link to the full-size image in a new tab.
+
+## Rendering steps and screenshots
+
+Each test case block has an optional `Setup Steps:` label and a `Test Steps:` label, each followed by lines like `Step N: <text> — <outcome>`. Render them as two separate numbered lists, each under its own header:
+
+```html
+<p><strong>Setup Steps</strong>:</p>
+<ol>
+  <li>Navigate to … — PASS</li>
+</ol>
+<p><strong>Test Steps</strong>:</p>
+<ol>
+  <li>Click Tools dropdown — PASS</li>
+</ol>
+```
+
+- Omit the Setup Steps header and its list entirely when the block has no `Setup Steps:` section.
+- An indented `  Screenshot: <filename>` line belongs to the step on the line directly above it. Render the thumbnail **inside that step's `<li>`**, after the step text:
+
+```html
+<li>Click Tools dropdown — PASS
+  <a class="screenshot-link" href="screenshots/test-case-1-step-3-….png" target="_blank">
+    <img src="screenshots/test-case-1-step-3-….png" alt="test-case-1-step-3">
+  </a>
+</li>
+```
+
+- The URL shown in the test case header comes from the first step whose text begins `Navigate to`.
+- Do not emit a separate Screenshots section.
+- A step line beginning with `[HUMAN]` renders as `<li class="human-step">…</li>`.
+
+## Adaptive status rendering
+
+When a test case has `PASS (adaptive)` status:
+
+- Render its status line as `⚠️ PASS (adaptive)` — use the amber warning symbol, not the green ✅
+- Do NOT include it in the Issues Summary section (it is a pass, not a failure)
+- In the Recommendations section, add a bullet for each adaptive test case:
+  `Update test plan: TC<N> asserted <what was specified> — actual rendering is <what was found>. Update the assertion in the test plan to match.`
+
+When there are no adaptive test cases, omit any mention of them from the Recommendations section.
+
+## Output
+
+Return a single fenced ```html``` block containing the full HTML document (populated from `${CLAUDE_SKILL_DIR}/templates/report-template.html`). No other text — the entire response is the fenced block.
diff --git a/plugins/bitwarden-playwright-testing/skills/compiling-test-report/templates/report-template.html b/plugins/bitwarden-playwright-testing/skills/compiling-test-report/templates/report-template.html
new file mode 100644
index 0000000..41fbe51
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/compiling-test-report/templates/report-template.html
@@ -0,0 +1,149 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Web Test Report: <Plan Name></title>
+  <style>
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+      max-width: 960px;
+      margin: 0 auto;
+      padding: 2rem;
+      color: #24292e;
+      line-height: 1.6;
+    }
+    h1 { font-size: 2em; border-bottom: 1px solid #e1e4e8; padding-bottom: 0.5rem; margin-bottom: 1rem; }
+    h2 { font-size: 1.5em; border-bottom: 1px solid #e1e4e8; padding-bottom: 0.3rem; margin-top: 2rem; }
+    h3 { font-size: 1.25em; margin-top: 1.5rem; }
+    hr { border: none; border-top: 1px solid #e1e4e8; margin: 2rem 0; }
+    table { border-collapse: collapse; }
+    th, td { border: 1px solid #dfe2e5; padding: 6px 13px; text-align: left; }
+    th { background: #f6f8fa; font-weight: 600; }
+    code { background: #f6f8fa; padding: 2px 5px; border-radius: 3px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; font-size: 0.9em; }
+    ol, ul { padding-left: 2rem; }
+    li { margin: 0.25rem 0; }
+    p { margin: 0.5rem 0; }
+    .screenshot-link { display: inline-block; margin: 8px 0; }
+    .screenshot-link img {
+      width: 50%;
+      max-width: 100%;
+      display: block;
+      border: 1px solid #e1e4e8;
+      border-radius: 4px;
+    }
+    .screenshot-link:hover img { border-color: #0366d6; }
+    .human-step {
+      border-left: 3px solid #f0ad4e;
+      background: #fff8e1;
+      padding: 0.2rem 0 0.2rem 8px;
+    }
+  </style>
+</head>
+<body>
+
+<h1>Web Test Report: <Plan Name></h1>
+
+<p><strong>Date</strong>: <YYYY-MM-DD HH:mm></p>
+<p><strong>Plan File</strong>: <code>.playwright-testing-artifacts/<slug>/test-plan.md</code></p>
+<p><strong>Services Tested</strong>: <comma-separated list, e.g. Web (8080), Api (4000), Identity (33656)></p>
+<p><strong>Base URL</strong>: <code>&lt;https://localhost:PORT&gt;</code></p>
+
+<hr>
+
+<h2>Summary</h2>
+
+<table>
+  <tr><th>Total</th><th>Passed</th><th>Passed (Adaptive)</th><th>Failed</th><th>Errors</th></tr>
+  <tr><td>N</td><td>N</td><td>N</td><td>N</td><td>N</td></tr>
+</table>
+
+<hr>
+
+
+<h2>Test Results</h2>
+
+<h3>Test Case 1: <Test Case Name></h3>
+
+<p><strong>Status</strong>: ✅ PASS / ⚠️ PASS (adaptive) / ❌ FAIL / ⚠️ ERROR</p>
+<p><strong>URL</strong>: <code>https://localhost:PORT/path</code></p>
+
+<!-- Omit the Setup Steps header + list entirely when the test case has no setup steps. -->
+<p><strong>Setup Steps</strong>:</p>
+<ol>
+  <li>Navigated to login page — PASS
+    <!-- Screenshot inline, INSIDE the <li>, after the step text. One per visual state change. -->
+    <a class="screenshot-link" href="screenshots/setup-tc-1-step-1-20240101-0930.png" target="_blank">
+      <img src="screenshots/setup-tc-1-step-1-20240101-0930.png" alt="setup-tc-1-step-1">
+    </a>
+  </li>
+  <!-- [HUMAN] steps get class="human-step" (amber/yellow). -->
+  <li class="human-step">Attach a Stripe test clock to the subscription — COMPLETED (User: done)</li>
+</ol>
+
+<p><strong>Test Steps</strong>:</p>
+<ol>
+  <li>Navigated to URL — PASS</li>
+  <li>Clicked "..." — PASS
+    <a class="screenshot-link" href="screenshots/test-case-1-step-2-20240101-0930.png" target="_blank">
+      <img src="screenshots/test-case-1-step-2-20240101-0930.png" alt="test-case-1-step-2">
+    </a>
+  </li>
+  <li>Asserted "..." was visible — PASS</li>
+</ol>
+
+<p><strong>Notes</strong>: Any observations about behavior, timing, unexpected UI state, etc.</p>
+
+<p><strong>Suggested fix</strong> <em>(if failed)</em>: Description of what likely needs to change and where.</p>
+
+<hr>
+
+<h3>Test Case 2: <Test Case Name></h3>
+
+<p><strong>Status</strong>: ✅ PASS / ⚠️ PASS (adaptive) / ❌ FAIL / ⚠️ ERROR</p>
+<p><strong>URL</strong>: <code>https://localhost:PORT/path</code></p>
+
+<p><strong>Test Steps</strong>:</p>
+<ol>
+  <li>... — PASS
+    <a class="screenshot-link" href="screenshots/test-case-2-step-1-20240101-0930.png" target="_blank">
+      <img src="screenshots/test-case-2-step-1-20240101-0930.png" alt="test-case-2-step-1">
+    </a>
+  </li>
+</ol>
+
+<p><strong>Notes</strong>: ...</p>
+
+<p><strong>Suggested fix</strong> <em>(if failed)</em>: ...</p>
+
+<hr>
+
+<!-- Repeat for each test case -->
+
+<hr>
+
+<h2>Issues Summary</h2>
+
+<!-- List all failures and errors with brief descriptions -->
+
+<ul>
+  <li>❌ Test Case N: <brief description of failure></li>
+  <li>⚠️ Test Case N: <brief description of error></li>
+</ul>
+
+<p><em>Leave blank if all test cases passed.</em></p>
+
+<hr>
+
+<h2>Recommendations</h2>
+
+<!-- Suggested follow-up actions based on test results -->
+
+<ul>
+  <li>Fix: ...</li>
+  <li>Investigate: ...</li>
+  <li>Re-test: ...</li>
+</ul>
+
+</body>
+</html>
diff --git a/plugins/bitwarden-playwright-testing/skills/determining-required-services/SKILL.md b/plugins/bitwarden-playwright-testing/skills/determining-required-services/SKILL.md
new file mode 100644
index 0000000..272ef9e
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/determining-required-services/SKILL.md
@@ -0,0 +1,33 @@
+---
+name: determining-required-services
+description: Determine which Bitwarden local development services are required for a given set of routes and the current branch diff. Use this skill when given the routes the tests will navigate to (extracted from an Application Context's ## States section). The skill runs its own `git diff --name-only`, consults references/services.md, and returns the union of services required by route-based dependencies and file-path-based dependencies. Returns service names with their URLs and ports.
+---
+Given the routes the tests will navigate to AND the affected repos, determine which local services are required to run web tests. The skill runs its own `git diff --name-only origin/main...HEAD -- <repo-path>` against each affected repo to obtain the changed file list, then consults `${CLAUDE_SKILL_DIR}/references/services.md` for the dependency map.
+
+## Inputs
+
+- **Routes:** list of URLs the tests will navigate to (typically extracted from an Application Context's `## States` section by the calling agent).
+- **Affected repos:** the same repos passed to `exploring-application-context` — used as scope for `git diff`.
+
+## Procedure
+
+1. For each affected repo, run `git diff --name-only origin/main...HEAD -- <repo-path>` and collect the resulting file paths.
+2. For each file path, match against the `Required by:` clauses in `references/services.md` to determine which services that file's change requires.
+3. For each route, match against the route-based `Required by:` clauses in `references/services.md` to determine which services that route requires.
+4. Take the union of services from steps 2 and 3.
+5. If the union is empty (e.g., only `clients/apps/web/**` template-only changes with no routes), fall back to the Web vault frontend + Api + Identity baseline.
+6. Identify the primary test URL — the web vault (`https://localhost:8080`) when any web vault route is present, otherwise the Bitwarden Portal (`http://localhost:62911`) when only Admin routes are present.
+
+## Output
+
+Return the output as a markdown block whose first non-empty line is the literal heading `## Required Services`. Below that heading, list each required service as a bullet with name, URL, and port. Clearly note the **primary test URL** since it drives the render verification step.
+
+Example:
+
+```markdown
+## Required Services
+
+- Api — `http://localhost:4000` (port 4000)
+- Identity — `http://localhost:33656` (port 33656)
+- Web — `https://localhost:8080` (port 8080) **(primary test URL)**
+```
diff --git a/plugins/bitwarden-playwright-testing/skills/determining-required-services/references/services.md b/plugins/bitwarden-playwright-testing/skills/determining-required-services/references/services.md
new file mode 100644
index 0000000..ea85e8e
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/determining-required-services/references/services.md
@@ -0,0 +1,87 @@
+# Bitwarden Service Dependency Reference
+
+## Resolving `<bitwarden git root>`
+
+`<bitwarden git root>` is the directory containing the Bitwarden `clients/` and `server/` checkouts — typically the current working directory. When resolving a repo path below, first try it relative to the current working directory. If it isn't there, attempt to locate the repo using your own reasoning (e.g. check nearby directories). If it still can't be found, **stop and alert the user that the repository folder could not be located** rather than guessing.
+
+## Service Map
+
+### Web Vault Frontend
+- **Port**: 8080
+- **URL**: `https://localhost:8080`
+- **Technology**: Angular (NX/Webpack)
+- **Repo**: `<bitwarden git root>/clients/`
+- **Health check**: `https://localhost:8080` (200 response)
+- **Required by**: any change to `clients/apps/web/**` or `clients/libs/**`, and any server-side API change that surfaces in the web UI
+
+### Api Service
+- **Port**: 4000
+- **URL**: `http://localhost:4000`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Api/`
+- **Health check**: `http://localhost:4000/alive`
+- **Required by**: web vault testing (handles vault data), any `server/src/Api/**` change
+
+### Identity Service
+- **Port**: 33656
+- **URL**: `http://localhost:33656`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Identity/`
+- **Health check**: `http://localhost:33656/alive`
+- **Required by**: any flow involving login/authentication, always required alongside Api for web vault
+
+### Bitwarden Portal
+- **Port**: 62911
+- **URL**: `http://localhost:62911`
+- **Technology**: .NET Razor views (NOT Angular)
+- **Repo**: `<bitwarden git root>/server/src/Admin/`
+- **Health check**: `http://localhost:62911` (200 response)
+- **Required by**: `server/src/Admin/**` changes only
+- **Note**: The Bitwarden Portal is a standalone .NET web app. No frontend build is needed. Playwright navigates directly to port 62911.
+
+### Billing Service
+- **Port**: 44519
+- **URL**: `http://localhost:44519`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Billing/`
+- **Health check**: `http://localhost:44519/alive`
+- **Required by**: `server/src/Billing/**` changes, billing UI flows
+
+### billing-pricing Service
+- **Port**: 7088 (HTTPS), 5082 (HTTP)
+- **URL**: `https://localhost:7088`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/billing-pricing/`
+- **Health check**: `http://localhost:5082/alive` (200 response) — use HTTP; the HTTPS port (7088) has SSL errors in dev
+- **Required by**: `billing-pricing/src/**` changes only — never triggered by routes or pricing UI flows
+- **Note**: Separate repo — does not share `Bitwarden.sln`. Does not need the pre-build step and does not use `--no-build`. Most developers use a QA cloud environment for pricing; only require this service when the billing-pricing repo has local code changes on the branch.
+
+---
+
+## Optional Infrastructure Services
+
+These services are **not required to start upfront** but may be needed if tests fail with errors suggesting a dependent service is unavailable. Start them on demand when you observe that failure.
+
+### Notifications Service
+- **Port**: 61840
+- **URL**: `http://localhost:61840`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Notifications/`
+- **Health check**: `http://localhost:61840` (200 response)
+- **Start if**: tests fail with real-time sync errors, push notification failures, or vault sync not reflecting changes
+
+### Events Service
+- **Port**: 46273
+- **URL**: `http://localhost:46273`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Events/`
+- **Health check**: `http://localhost:46273` (200 response)
+- **Start if**: tests fail involving audit logs, organization event history, or event recording flows
+
+### Icons Service
+- **Port**: 50024
+- **URL**: `http://localhost:50024`
+- **Technology**: .NET
+- **Repo**: `<bitwarden git root>/server/src/Icons/`
+- **Health check**: `http://localhost:50024` (200 response)
+- **Start if**: tests fail involving favicon/icon display for vault items, or icon-related network errors appear in the browser console
diff --git a/plugins/bitwarden-playwright-testing/skills/executing-web-tests/SKILL.md b/plugins/bitwarden-playwright-testing/skills/executing-web-tests/SKILL.md
new file mode 100644
index 0000000..366a699
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/executing-web-tests/SKILL.md
@@ -0,0 +1,220 @@
+---
+name: executing-web-tests
+description: Execute Bitwarden web test cases step-by-step using the playwright-cli skill directly. Use after test cases are defined and services are running. Governs tool policy, screenshot naming, toast capture, Setup Steps execution, and the billing blocker policy.
+---
+
+Given the test cases, artifacts output dir, and the absolute path to `scripts/playwright.config.json`, execute the tests yourself by calling `Skill(playwright-cli)` for each individual browser action.
+
+## Before you start
+
+### Resolve these values from your inputs
+
+| Value                | Source                                                                        |
+| -------------------- | ----------------------------------------------------------------------------- |
+| Screenshot directory | `<artifacts-output-dir>/screenshots/` |
+| Config path          | The absolute path to `scripts/playwright.config.json` you received as input   |
+| Timestamp            | Generate once now as `YYYYMMDD-HHmm` and reuse across all screenshots         |
+
+### Read the tool policy
+
+Read `${CLAUDE_PLUGIN_ROOT}/references/tool-policy.md`. This governs which tools you may use throughout the run. Follow it without exception.
+
+### Billing blocker policy
+
+If any billing-related 400 error is encountered during setup or test-case execution, stop immediately, do not continue testing, and report the entire run as FAIL with the exact error before any partial completion is reported.
+
+### Resume context (conditional)
+
+Only when a `Resume:` block is present in your inputs: extract and hold:
+- **Paused at** — the location string identifying the `[HUMAN]` step, e.g. `"Test Case 3, Setup Step 5: Attach a Stripe test clock"`
+- **User's answer** — to apply to subsequent steps that reference the `[HUMAN]` step's result
+
+For the resuming test case (your caller always passes remaining test cases starting with the paused one, so this is the first test case in your input), before executing any of its steps:
+1. Open the browser fresh: `playwright-cli open --config=<config-path>` (always first, same as any run)
+2. Re-establish browser session using credentials from that test case's SETUP steps in the test plan
+3. Start from the step immediately after the `[HUMAN]` step identified by "Paused at", applying the user's answer to any steps that reference it
+
+All subsequent test cases run fully and normally from their first step.
+
+If the `[HUMAN]` step was the last step of the resuming test case (no test steps follow it within that case), record the test case result using the user's answer as the outcome of that step, then proceed to subsequent test cases or produce `=== TEST RUN COMPLETE ===` if none remain.
+
+This protocol repeats for each `[HUMAN]` step encountered in a run — a second pause in a resumed run uses the same partial-emit and signal format.
+
+A `Resume:` block in your inputs looks like:
+```
+Resume: Paused at <location string>. User's answer: <answer>.
+```
+
+## Step 1 — Initialize the browser session
+
+Before any navigation, open the browser with the custom config to disable SSL certificate errors. This must be the first `playwright-cli` call — all subsequent interactions inherit this session:
+
+```
+Skill(playwright-cli): open --config=<config-path>
+```
+
+## Step 2 — Run setup and authentication
+
+Any login, magic-link flow, or account/org creation required before the first test case is **setup**, not part of a test case.
+
+- Use `setup-{description}-{timestamp}.png` screenshot names during setup (e.g., `setup-login-complete-20260409-2057.png`)
+- Apply the same "screenshot every visual state change" rule as during test cases (see Step 3)
+- Record everything done: account email/password, org created, billing performed, email verifications followed, and any step that failed
+
+## Step 3 — Execute test cases
+
+Work through every test case in order. For each test case:
+
+### 3a — Run Setup Steps first (if any)
+
+Some test cases contain lines labeled `SETUP:`. Execute all of them before any Test Steps.
+
+- Use `setup-tc-N-step-M-{timestamp}.png` screenshot names (N = test case number, M = setup step number)
+- If any SETUP step fails — including any HTTP 4xx or 5xx response — stop immediately:
+  1. Do NOT retry or modify parameters
+  2. Mark the test case FAILED with the setup failure as the reason
+  3. Do NOT proceed to Test Steps or subsequent test cases
+  4. Put the exact request and response body in `Notes:`
+
+### 3b — Run Test Steps
+
+After all SETUP steps complete, execute the Test Steps.
+
+- Use `test-case-N-step-M-{timestamp}.png` screenshot names
+- Assert each step's expected outcome and record PASS or FAIL
+
+### Test case block format
+
+Every test case block — in the run-complete output (Step 4) and in the partial output emitted at a `[HUMAN]` halt — uses this exact shape:
+
+```
+--- TEST CASE N: <name> ---
+Status: <PASS | PASS (adaptive) | FAIL | ERROR>
+Setup Steps:
+Setup Step 1: <description> — <PASS | FAIL>
+  Screenshot: setup-tc-N-step-1-<timestamp>.png
+[HUMAN] Setup Step M: <description> — COMPLETED (User: <answer>)
+Test Steps:
+Step 1: <description> — <PASS | FAIL>
+  Screenshot: test-case-N-step-1-<timestamp>.png
+Step 2: Assert <selector/condition> — <PASS | FAIL> (<what was actually observed>)
+Notes: <notes, if any>
+--- END TEST CASE N ---
+```
+
+- `Status:` is the first line of the block.
+- Omit the entire `Setup Steps:` section when the test case has no setup steps.
+- A `  Screenshot: <filename>` line (two-space indent) goes on the line immediately after the step it documents — one line per screenshot, only when that step produced a visual change. Do not collect screenshots into a trailing list.
+- For an assertion step, append what you actually observed in parentheses after the outcome — e.g. `Step 4: Assert ".badge.bg-secondary" visible — PASS (badge text: "Inactive")`.
+- `[HUMAN]` prefixes a human-completed step; its outcome is `COMPLETED (User: <answer>)`.
+- Omit `Notes:` when there is nothing to note.
+
+### Adaptive assertion evaluation
+
+After any assertion step fails, apply this evaluation before recording the result — using only what you already observed during normal execution:
+
+1. Review page content, visible text, error messages, and element content already in your context and screenshots. Do NOT issue additional browser calls.
+2. Ask: "Is the semantic condition this assertion was checking demonstrably present in what I already observed?" The semantic condition is the underlying behavior or content the test intends to verify, independent of the specific CSS selector or element path the plan specified.
+3. Apply the rule to each failed assertion individually:
+   - If **all** failed assertions resolve adaptively → record the test case as `PASS (adaptive)`
+   - If **any** failed assertion represents a genuine failure → record `FAIL`; document the adaptive assessments for the resolved assertions in Notes
+4. When recording `PASS (adaptive)`, write in Notes:
+   - What the plan's assertion specified
+   - What was actually found
+   - Why the semantic condition is considered met
+5. Do NOT apply adaptive evaluation when:
+   - The feature behavior itself is wrong (e.g., the server accepted input it should have rejected)
+   - The expected content or behavior is genuinely absent from the page
+   - The test could not run due to environment state (dirty database, missing seed data, skipped `[HUMAN]` step)
+   - The failed assertion was a URL/navigation check (wrong URL always means wrong behavior)
+
+### Screenshot policy
+
+Call `Skill(playwright-cli)` to take a full-page screenshot **after every visual state change** — no exceptions:
+
+- After navigating to a new page or URL
+- After a modal, dialog, or overlay opens or closes
+- After a checkbox, toggle, accordion, or other element reveals or hides content
+- After a form is submitted and a result or error appears
+- After a toast or notification appears — capture immediately before it auto-dismisses (toasts last 2-5 seconds). Watch for up to 3 seconds after any state-changing action; if no toast appears, continue
+
+Always save screenshots in the artifact output directory and pass `--full-page`: `screenshot --filename=<artifacts-output-dir>/screenshots/<name>.png --full-page`
+
+Do NOT screenshot after: `run-code`, `eval`, `console`, `cookie-get`, or any pure-inspection action; or a step where nothing visible changed.
+
+When in doubt, take the screenshot. A redundant screenshot costs nothing; a missing one cannot be recovered.
+
+### Asserting transient toasts
+
+Toasts can auto-dismiss in well under a second. To capture toast text reliably, read it from the live DOM: use `playwright-cli eval` to read the toast region's text right after the action, or `playwright-cli run-code` to wait for the toast region and return its text (arm the wait together with the triggering action so a short-lived toast is caught as it renders).
+
+When the action causes a full page reload (the server-rendered Admin Portal — ASP.NET MVC), the new page fires the toast from an inline `document.ready` script, so the action's promise resolves before the toast renders and arming a wait alongside the action cannot catch it. For this post-back case, read the toast from the new page instead: assert its text from the inline `toastr.*("...")` call in the page source, or read the toast node on the new page's load.
+
+### Continuity rule
+
+External trigger results (curl responses), email reads, and URL extractions are intermediate working steps — not stopping points. After each, proceed immediately to the next test step.
+
+For email-driven flows (verification, magic-link login, trial activation, OTP), call the mailcatcher reader script directly via Bash:
+
+```
+${CLAUDE_PLUGIN_ROOT}/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh --recipient <email> --pattern <subject-keyword>
+```
+
+stdout is the URL — use it as input to the next browser step. The script already retries once on `NO_MATCH`; a non-zero exit after the retry is a hard failure — mark the test case FAIL immediately with the `NO_MATCH` diagnostic in Notes. Do not attempt to read Mailcatcher via any other means (curl, direct API calls, or sub-agent). Do not invoke `Skill(reading-mailcatcher-api)` (it is documentation for the underlying API; the co-located script is the only sanctioned transport).
+
+### Human step halt
+
+When executing any step (Setup or Test) whose text begins with `[HUMAN]`, halt immediately. Do not retry, infer, or skip.
+
+Before returning, emit all completed test-case blocks using the Test case block format defined in Step 3, close the block with the pause marker, then append the signal as the very last line:
+
+```
+=== TEST RUN RESULTS ===
+
+SUMMARY: <N completed in this segment> test cases | N passed | N passed (adaptive) | N failed
+
+--- TEST CASE N: <name> ---
+[emit completed test case block using the Test case block format defined in Step 3]
+--- END TEST CASE N ---
+
+[one block per completed test case, in order]
+
+=== PARTIAL RUN — PAUSED ===
+
+Need user input: <step text after the [HUMAN] marker, verbatim, with location context — e.g. "Test Case 1, Setup Step 8: Attach a Stripe test clock to the subscription.">
+```
+
+Rules:
+- `SUMMARY:` reflects only test cases completed in this segment.
+- If zero test cases have completed yet, write `SUMMARY: 0 test cases | 0 passed | 0 passed (adaptive) | 0 failed` and omit the test case blocks.
+- `=== PARTIAL RUN — PAUSED ===` is the segment delimiter and replaces `=== TEST RUN COMPLETE ===` on a pause.
+- `Need user input:` is always the very last line of the response.
+- Do not produce `=== TEST RUN COMPLETE ===` on a pause.
+
+You are not done with a test case until both SETUP steps and Test Steps are complete with PASS or FAIL recorded. You are not done with the run until all test cases are complete and the `=== TEST RUN COMPLETE ===` marker is produced.
+
+## Step 4 — Produce the required output
+
+Do not return until every test case has a complete block. The `=== TEST RUN COMPLETE ===` line may only appear after all blocks.
+
+Before writing the output block, run:
+
+```bash
+ls <screenshot-dir> | grep '<timestamp>'
+```
+
+This gives you the ground-truth list of screenshots this run actually wrote. For each test case N, files whose names contain `test-case-N-` are that case's test-step screenshots and `setup-tc-N-` are its setup-step screenshots. Use these exact filenames in the indented `Screenshot:` lines — place each on the line immediately after the step it documents — and do not reconstruct names from memory.
+
+```
+=== TEST RUN RESULTS ===
+
+SUMMARY: N test cases | N passed | N passed (adaptive) | N failed
+
+--- TEST CASE N: <name> ---
+[emit test case block using the Test case block format defined in Step 3]
+--- END TEST CASE N ---
+
+[one block per test case, in order]
+
+=== TEST RUN COMPLETE: N total, N passed, N passed (adaptive), N failed ===
+```
diff --git a/plugins/bitwarden-playwright-testing/skills/exploring-application-context/SKILL.md b/plugins/bitwarden-playwright-testing/skills/exploring-application-context/SKILL.md
new file mode 100644
index 0000000..b967816
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/exploring-application-context/SKILL.md
@@ -0,0 +1,146 @@
+---
+name: exploring-application-context
+description: Explore the Bitwarden codebase (clients and server) to build a state-centric Application Context for test planning. Use before building test cases whenever a Jira ticket or plan is provided. Returns a markdown document with two sections — ## States (real-user-reachable, observable UI conditions with their verification points) and ## Flows (sequences that transition between states) — grounded in real client and server code.
+---
+
+Given the affected repos, feature description, and acceptance criteria, build a state-centric Application Context by exploring the codebase. This is what `build-test-cases` consumes to generate grounded, accurate test cases.
+
+The artifact is a contract: every state the planner can ask the application to be in, the flows that put it there, and the UI projections it can assert. Information that does not serve that contract is out of scope.
+
+Model what a **real user can reach and observe** — not every selector in the blast radius. Scale your effort to the change: model the minimal set of states needed to assert the change and its acceptance criteria, then stop. Minimal does not mean partial — the target states should *span* the change's blast radius (the observable behaviors the diff touches), not only the headline symptom. Cover every observable behavior the change introduces or modifies, and stop there: the diff is the boundary, so this is not a license to model behaviors the change does not touch.
+
+## Gathering procedure
+
+Read `${CLAUDE_SKILL_DIR}/references/known-flows.md` once before gathering states and flows. It holds two pre-grounded catalogs — `## Known States` (reusable setup states) and `## Known Flows` (reusable flows) — that both sections below draw from; copy relevant entries verbatim rather than re-deriving them.
+
+### Gather the blast radius
+
+For each affected repo passed in by the calling agent, run:
+
+```bash
+git diff origin/main...HEAD --name-only -- <repo-path>
+```
+
+Read the change set. For each changed component, controller, command, or template, trace the handlers and templates it references to identify the **trace surface** — non-diff code you need to read to identify states and flows. The change set and trace surface together form the blast radius. The blast radius is working context only — do not emit it.
+
+### Gather `## States`
+
+States come in two tiers:
+
+- **Target state** — a state the change *produces or modifies*, and that a test asserts against. Model these fully (route + verification points), applying the validity gates below.
+- **Setup state** — a state that only *positions* the app for the test (a precondition or a generic authenticated context); never the assertion target. Satisfy a setup state one of two ways:
+  - **Catalog copy:** if the state appears under `## Known States` in the catalog, copy its entry verbatim. Do not re-ground it.
+  - **Route-only:** otherwise, declare it with its `Route` and a single landmark check confirming the page loaded.
+
+A state is a **target** state if and only if it is the post-condition of a *change-driven* flow — one you traced from the diff. Every state referenced as a precondition or post-condition of a *copied catalog flow* is a **setup** state.
+
+#### Validity gates — apply as you mint each state and verification point
+
+Before recording any state or verification point, confirm all three. If one fails, drop it from the artifact — remove the state *and* its producing flow. Recognizing a failure in prose is not enough: never emit a failed-gate state with a disclaimer that it isn't really reachable; delete it.
+
+1. **Actually observable.** Assert only what a user would *see* in this state. An element present in the DOM but hidden — by the `hidden` attribute, `display:none`, a collapsed/accordion container, an unsatisfied `@if`/`*ngIf`, or any framework's equivalent — is not observable. Reason about the state's real rendered condition in whatever framework renders it (Angular client or server-rendered Razor).
+2. **Correct branch / default.** When behavior is conditional, identify which branch is live in the state you are modeling. For an initial or landing state, check the actual default value that drives the condition, and assert only that branch. Never promote a conditional rule ("hidden iff churn-only") into a default-state assertion ("hidden on load").
+3. **Requirement-anchored.** Assert what the change and the acceptance criteria require. Do not invent expectations the code never promises and no criterion asks for.
+
+#### Recording a target state
+
+- **Slug.** Choose a kebab-slug that encodes distinguishing features when near-neighbor states exist; never reuse a user-intent label across distinct states (e.g. `state:subscription-pending-cancellation` vs. `state:subscription-pending-cancellation-with-deferred-price-schedule`).
+- **Route.** The Angular route or full URL the planner navigates to to assert this state.
+- **Verification points.** Record the points that identify this state. For each point: Selector value, Selector type, Expectation, and a `Source:` citation (`file:line`) for where the asserted element or message is defined. **The first grounded, observable selector that identifies the state wins.** If observability in this state depends on a gate (a collapsed container, a conditional), note that gate in prose in `Source:`. If the gate is unsatisfied in this state's landing condition, the point is not observable here (gate 1) — choose a different point, or model the state as the condition in which the element *is* observable and have its producing flow drive into that condition.
+- **Choose the assertion basis by what you are observing — text content vs. structure/state.**
+  - **Text content.** When the verification is that some *text* renders correctly — a validation error, toast, banner/callout, a localized or runtime-computed term (e.g. `/ 年`), a relabeled control, any case where "is the right text on screen?" is the question — the verification point **must use `Selector type: text`**, with the text substring as the Selector value. A `text contains "..."` expectation may **not** be grounded on any structural selector (`data-testid`, `tag`, `role`, or `css`). Collision-safety comes from a **distinctive substring**, not a structural selector — assert the longest literal substring that excludes placeholder tokens and cannot match elsewhere on the page (e.g. `Churn-only cohorts cannot have a proactive discount coupon.`, not a short fragment). If no distinctive substring exists — a short localized unit or computed term like `/ 年` has none — keep `Selector type: text` and name its nearest stable container in `Source:` so the read can be scoped there; the container only bounds the search, it never becomes the assertion basis. Only assert text the change affects.
+  - **Structure / state.** When the verification is a non-text property — element count, visible/hidden, enabled/disabled, the presence of a structural element — assert via the **selector + `Expectation`**. This is where a `data-testid`/role selector is the right assertion basis. A hyphenated tag (`bit-select`, `bit-input`, `bit-radio-*`) is a Bitwarden component, not native HTML, and does not render as its namesake — never ground on `<tag>#id` (e.g. `select#locale`); use its `role` (a `bit-select` renders as a combobox) or a stable `data-testid`.
+- **Reachability.** Every state declares `Reachable by playwright:`. Set it to `yes` if a producer flow or mechanism can drive the application into this state using only the playwright-cli skill. Otherwise set it to `no` and add an **`If no — why:`** one-liner and a **`Reach via:`** recipe describing the sanctioned out-of-band action (a `[HUMAN]` step, a database row a sanctioned tool inserts, or a non-playwright skill) that reaches it.
+- **Producers.** Leave `**Produced by:**` lines in place; fill them in after `## Flows` is gathered.
+- **Flag-conditional UI variants fan out into separate states** with distinct slugs, not one state with conditional verification points.
+
+#### Reach via conventions
+
+For states with `Reachable by playwright: no`, the `Reach via:` recipe documents how the team-lead or user can drive the application into the state using tools beyond playwright-cli. Free-form prose with these conventions:
+
+- **Reference flows by slug:** `Run flow:create-paid-org with orgName=…`
+- **Reference skills by name:** `Use the invoke-stripe-api skill to advance the test clock by 14 days.`
+- **Mark human steps explicitly:** `[HUMAN] Attach a Stripe test clock to the subscription.` The bracketed `[HUMAN]` prefix is a structural marker — downstream consumers detect it deterministically.
+- **Mark `[HUMAN]` verification points the same way:** when confirming a state requires a check the tool policy disallows (a database-field inspection, or any verification playwright cannot perform), record it as a verification point prefixed with `[HUMAN]`.
+
+### Gather `## Flows`
+
+1. From the catalog's `## Known Flows` section, copy relevant entries through verbatim if their post-condition state matches a state in `## States`, OR their precondition/steps exercise UI affected by the change. (Setup states their preconditions reference are minted in `## States` via catalog copy or route-only, per Gather `## States` above.)
+2. **Token resolution:** When copying any flow whose Steps contain `<bitwarden-portal-admin-email>`, read `server/dev/secrets.json` in the server repo and extract the first entry under the `admins` key. Substitute the resolved address for every occurrence. If the file is absent or `admins` is empty, surface this as a self-review error — do not leave the placeholder unresolved.
+3. For change-driven flows not in the catalog: trace the click handler or form submission through the server controller, command, and integration calls. Enumerate atomic steps, inline per-step feedback (a `- Feedback:` sub-item on each step that produces a visible response), post-condition state, and any branch conditions. Every step must be a real user interaction.
+4. After flows are populated, return to `## States` and fill in each state's `**Produced by:**` line with the slug(s) of the flow(s) whose post-condition is that state.
+
+Every flow obeys these rules:
+
+- **Each flow has exactly one terminal state per branch.** Split multi-stage journeys into one flow per state transition.
+- **Producing flows must reveal their post-condition's gated elements.** If a target state has a verification point whose element is hidden by default, the flow's Steps must include the reveal interaction, and that step's `- Feedback:` sub-item must state that the gated element becomes visible.
+- **`When <condition>:` is free-form prose** (flag conditions or runtime conditions). If the planner can't evaluate the condition at plan time, it picks Default.
+
+## Output schema
+
+Produce a single markdown document with exactly two top-level sections, in this order: `## States` then `## Flows`. No other top-level sections.
+
+### `## States`
+
+For each state:
+
+```
+### state:<short-kebab-slug>
+
+**State type:** target | setup
+
+**Produced by:**
+- flow:<slug>
+- <one or more producer flows; if none, the state is reachable out-of-band (see Reach via:), in which case write `none`>
+
+**Reachable by playwright:** yes | no
+**If no — why:** <one line>  (only when "no")
+**Reach via:**  (only when "no")
+- <numbered recipe — see Reach via conventions>
+
+**UI projection:**
+- Route: <URL>
+- Verification points:
+  - Selector: <selector value>
+    - Selector type: tag | data-testid | role | text | css  (text-content points must use `text`; structure/state points use a structural type)
+    - Expectation: <visible | hidden | disabled | text contains "..." | count = N>
+    - Source: <file:line where the element/message is defined; note in prose any gate affecting observability in this state>
+```
+
+### `## Flows`
+
+For each flow:
+
+```
+### flow:<short-kebab-slug>
+
+**Use when:** <one-sentence summary>
+**Parameters:** <comma-separated placeholder names, or "none">
+**Precondition state:** state:<slug> | "none"
+**Steps:**
+1. <atomic UI action with selector and value>
+   - Feedback: <visible response — only on steps that produce one>
+2. ...
+**Post-condition state(s):**
+- Default: state:<slug>
+- When <condition>: state:<slug>  (only when post-condition branches)
+```
+
+## Producing the document — work in notes, serialize once
+
+Do all reasoning in working notes as you explore: accumulate states and verification points, applying the validity gates as you mint each one. **Do not write out the full `## States` / `## Flows` document as an intermediate step.** The complete document appears for the first and only time as your final response — it is a serialization of notes you have already validated, not a draft you revise.
+
+### Terminal self-review (one read-only pass over your notes)
+
+Run these checks once, against your notes, just before serializing. They are read-only — do not re-read source files, and do not re-open a state you have already validated.
+
+1. **Slug resolution.** Every `Precondition state:` and `Post-condition state:` slug exists as a `### state:<slug>` heading. Every `Produced by:` slug exists as a `### flow:<slug>` heading.
+2. **Parameter coverage.** Every parameter declared on a flow appears as a `<placeholder>` in its Steps, and every `<placeholder>` in Steps is declared in Parameters.
+3. **Target-state completeness.** Every target state has at least one observable verification point.
+4. **Text-content selector basis.** Every verification point whose `Expectation` is `text contains "..."` has `Selector type: text` — never a structural selector (`data-testid`, `tag`, `role`, or `css`).
+
+On any failure, surface the inconsistency in your return — do not self-fix by re-opening exploration.
+
+### Done condition
+
+You are done when every target state has at least one observable verification point and every referenced slug resolves. When that holds, serialize the document once and stop.
diff --git a/plugins/bitwarden-playwright-testing/skills/exploring-application-context/references/known-flows.md b/plugins/bitwarden-playwright-testing/skills/exploring-application-context/references/known-flows.md
new file mode 100644
index 0000000..4b93015
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/exploring-application-context/references/known-flows.md
@@ -0,0 +1,319 @@
+# Known Bitwarden States and Flows
+
+Curated reference of validated, reusable test states and UI flows for the Bitwarden web application. Both catalogs are consumed by `exploring-application-context` (entries are copied verbatim) and drive `build-test-cases`. State slugs are referenced across both catalogs; consistency is enforced by the `exploring-application-context` skill's self-review checks.
+
+---
+
+## Known States
+
+Reusable, pre-grounded setup states, written in the exact `## States` schema the `exploring-application-context` skill emits — so the skill copies an entry **verbatim** into its output (no transform, no re-grounding). Each route and verification point was derived from real source at authoring time (cited in `Source:`). When a cited element moves in the codebase, update the entry here once.
+
+---
+
+### state:authenticated-free-user
+
+**State type:** setup
+
+**Produced by:**
+- flow:create-new-user-and-login
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: https://localhost:8080/#/vault
+- Verification points:
+  - Selector: heading "All vaults"
+    - Selector type: role
+    - Expectation: visible
+    - Source: clients/apps/web/src/app/vault/individual-vault/vault-header/vault-header.component.ts:187 (default title from the `allVaults` i18n key, rendered as the page `<h1>` via clients/libs/components/src/header/header.component.html:7)
+
+### state:authenticated-premium-user
+
+**State type:** setup
+
+**Produced by:**
+- flow:purchase-premium-subscription
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: https://localhost:8080/#/settings/subscription/user-subscription
+- Verification points:
+  - Selector: heading "You have Premium"
+    - Selector type: role
+    - Expectation: visible
+    - Source: clients/apps/web/src/app/billing/individual/subscription/cloud-hosted-account-subscription.component.html:16 (the `youHavePremium` i18n key rendered as the page `<h1>`)
+
+### state:authenticated-with-paid-org
+
+**State type:** setup
+
+**Produced by:**
+- flow:create-paid-org
+- flow:complete-trial-signup-existing-user
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: https://localhost:8080/#/organizations/:organizationId/vault  (org-scoped dynamic URL; `/organizations/:organizationId` redirects to the `vault` child — clients/apps/web/src/app/admin-console/organizations/organization-routing.module.ts:79-81)
+- Verification points:
+  - Selector: link "Admin Console"
+    - Selector type: role
+    - Expectation: visible
+    - Source: clients/apps/web/src/app/admin-console/organizations/layouts/organization-layout.component.html:3 (org side-nav logo `<bit-nav-logo [label]="'adminConsole' | i18n">` — an org-name-independent `aria-label` rendered via clients/libs/components/src/navigation/nav-logo.component.html:11)
+
+### state:admin-portal-authenticated
+
+**State type:** setup
+
+**Produced by:**
+- flow:authenticate-admin-portal
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: http://localhost:62911
+- Verification points:
+  - Selector: heading "Dashboard"
+    - Selector type: role
+    - Expectation: visible
+    - Source: server/src/Admin/Views/Home/Index.cshtml:55 (static `<h1>Dashboard</h1>` on the authenticated Admin home, served by server/src/Admin/Controllers/HomeController.cs:30)
+
+### state:trialing-org-with-payment
+
+**State type:** setup
+
+**Produced by:**
+- flow:complete-trial-signup-with-payment
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: https://localhost:8080/#/organizations/:organizationId/vault  (the producer flow ends on the `/#/trial-initiation` "Confirmation Details" step — clients/apps/web/src/app/billing/trial-initiation/complete-trial-initiation/complete-trial-initiation.component.html:55 — whose "Get started" button routes to the org vault — same file:67-70)
+- Verification points:
+  - Selector: link "Admin Console"
+    - Selector type: role
+    - Expectation: visible
+    - Source: clients/apps/web/src/app/admin-console/organizations/layouts/organization-layout.component.html:3 (org side-nav logo, org-name-independent `aria-label` rendered via clients/libs/components/src/navigation/nav-logo.component.html:11)
+
+### state:trialing-org-without-payment
+
+**State type:** setup
+
+**Produced by:**
+- flow:complete-trial-signup-without-payment
+
+**Reachable by playwright:** yes
+
+**UI projection:**
+- Route: https://localhost:8080/#/organizations/:organizationId/vault  (same landing as the with-payment variant — only the billing step is skipped; the "Confirmation Details" step's "Get started" button routes to the org vault — clients/apps/web/src/app/billing/trial-initiation/complete-trial-initiation/complete-trial-initiation.component.html:67-70)
+- Verification points:
+  - Selector: link "Admin Console"
+    - Selector type: role
+    - Expectation: visible
+    - Source: clients/apps/web/src/app/admin-console/organizations/layouts/organization-layout.component.html:3 (org side-nav logo, org-name-independent `aria-label` rendered via clients/libs/components/src/navigation/nav-logo.component.html:11)
+
+### state:trial-verification-email-received
+
+**State type:** setup
+
+**Produced by:**
+- flow:trigger-trial-verification-email
+
+**Reachable by playwright:** no
+**If no — why:** non-UI intermediate state — verified by reading the trial-initiation email from Mailcatcher, not by a rendered page. The check is automated (a script), not a human step.
+**Reach via:**
+- Run flow:trigger-trial-verification-email (its external-trigger curl sends the verification email).
+- Run `${CLAUDE_PLUGIN_ROOT}/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh --recipient <email> --pattern "Verify"`; a trial-initiation URL printed on stdout confirms the state (exit 1 / `NO_MATCH` means the email has not arrived yet).
+
+**UI projection:**
+- Route: n/a
+- Verification points:
+  - Selector: trial-initiation URL on stdout from `read-mailcatcher.sh --recipient <email> --pattern "Verify"`
+    - Selector type: text
+    - Expectation: stdout contains a `https://localhost:8080/#/trial-initiation?...` URL
+    - Source: ${CLAUDE_PLUGIN_ROOT}/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh
+
+---
+
+## Known Flows
+
+Each flow can be used by `build-test-cases` either as a precondition-producing setup flow (referenced by name) or as the action sequence a test exercises (composed inline with assertions).
+
+Entry schema:
+
+- **Use when:** high-level summary of the situations this flow fits
+- **Parameters:** comma-separated placeholder names (e.g., `email`, `password`, `orgName`), or "none"
+- **Precondition state:** `state:<slug>` that must hold before running this flow — or "none"
+- **Steps:** numbered atomic UI actions with selectors and values; each step that produces a visible response carries an inline `- Feedback:` sub-item describing it (toast, redirect, modal, element enters/leaves the DOM)
+- **Post-condition state(s):**
+  - `Default: state:<slug>` — the terminal state the flow produces by default
+  - `When <condition>: state:<slug>` — branch states when the post-condition diverges (e.g., feature-flag-gated behavior)
+
+### flow:create-new-user-and-login
+
+- **Use when:** Any test that requires a fresh authenticated user account with no prior subscription or organization state.
+- **Parameters:** `email`, `password`
+- **Precondition state:** none
+- **Steps:**
+  1. Navigate to `https://localhost:8080/#/signup`
+  2. Fill the Email field with `<email>`
+  3. (Optional) Fill the Name field
+  4. Click Continue
+     - Feedback: "Check your email" confirmation state appears
+  5. Run `read-mailcatcher.sh --recipient <email> --pattern "Verify"` to fetch the verification email; stdout is the magic-link URL
+  6. Navigate to the magic-link URL (it targets `https://localhost:8080/#/finish-signup?...`)
+     - Feedback: finish-signup form appears
+  7. Fill the Master Password field with `<password>` (must be ≥12 characters)
+  8. Fill the Confirm Master Password field with `<password>`
+  9. Click Create Account
+     - Feedback: redirect to the vault
+- **Post-condition state(s):**
+  - Default: state:authenticated-free-user
+
+---
+
+### flow:purchase-premium-subscription
+
+- **Use when:** Any test that requires the user to already hold an active Premium subscription — subscription management page, premium-feature access, discount badge display (any eligible Stripe coupon imported to Admin portal applies automatically at checkout; see `build-test-cases/references/billing-test-data.md`), etc.
+- **Parameters:** none (uses defaults documented in `build-test-cases/references/billing-test-data.md`)
+- **Precondition state:** state:authenticated-free-user
+- **Steps:**
+  1. Navigate to `https://localhost:8080/#/settings/subscription/premium`
+     - Feedback: two pricing cards (Premium and Families) visible
+  2. Click the "Upgrade to Premium" button on the Premium pricing card
+  3. In the Payment Method section, fill the Stripe card number iframe (`frameLocator('[title="Secure card number input frame"]')`): `4242424242424242`
+  4. Fill the expiry iframe (`frameLocator('[title="Secure expiration date input frame"]')`): `12/29`
+  5. Fill the CVC iframe (`frameLocator('[title="Secure CVC input frame"]')`): `123`
+  6. In the Billing Address section, set Country to `United States` and fill the Postal Code field with `12345`
+  7. Click the "Upgrade" button
+     - Feedback: dialog closes; redirect to `https://localhost:8080/#/settings/subscription/user-subscription`; the subscription management view is visible
+- **Post-condition state(s):**
+  - Default: state:authenticated-premium-user
+
+---
+
+### flow:create-paid-org
+
+- **Use when:** Testing features that require a paid organization (Teams, Enterprise, Families, etc.) — including discount badge display on a Families organization (any eligible Stripe coupon imported to Admin portal applies automatically at checkout; see `build-test-cases/references/billing-test-data.md` for the discount mechanism).
+- **Parameters:** `orgName`, `billingEmail`, `planTier`
+- **Precondition state:** state:authenticated-free-user
+- **Steps:**
+  1. Navigate to `https://localhost:8080/#/create-organization`
+  2. Select `<planTier>` (lowest plan tier that supports the features being tested)
+  3. Fill in Organization Name with `<orgName>` and Billing Email with `<billingEmail>`
+  4. In the Payment Information section, select Credit Card
+  5. Fill the card number iframe (`frameLocator('[title="Secure card number input frame"]')`): `4242424242424242`
+  6. Fill the expiry iframe (`frameLocator('[title="Secure expiration date input frame"]')`): `12/29`
+  7. Fill the CVC iframe (`frameLocator('[title="Secure CVC input frame"]')`): `123`
+  8. Set Country to `United States` and fill the Postal Code field with `12345`
+  9. Submit the form
+     - Feedback: success redirect to the new org's vault or settings page
+- **Post-condition state(s):**
+  - Default: state:authenticated-with-paid-org
+
+---
+
+### flow:trigger-trial-verification-email
+
+- **Use when:** Setting up the first stage of any trial-initiation flow (with or without payment, new or existing user) — produces the verification email and retrieves the trial-initiation URL.
+- **Parameters:** `email`, `productTier`, `products`, `trialLength`, `paymentOptional`
+- **Precondition state:** none
+- **Steps:**
+  1. **EXTERNAL TRIGGER** — simulate the marketing site call with curl:
+     ```bash
+     curl -s -X POST http://localhost:33656/accounts/trial/send-verification-email \
+       -H "Content-Type: application/json" \
+       -d '{
+         "email": "<email>",
+         "name": "Test User",
+         "receiveMarketingEmails": false,
+         "productTier": <productTier>,
+         "products": <products>,
+         "trialLength": <trialLength>,
+         "paymentOptional": <paymentOptional>
+       }'
+     ```
+     Reference values — `productTier`: `0` = Free, `1` = Teams, `2` = Enterprise, `3` = Families. `products`: `1` = PasswordManager, `2` = SecretsManager. `paymentOptional`: `true` skips the payment step in the downstream completion flow; `false` requires payment.
+  2. Run `read-mailcatcher.sh --recipient <email> --pattern "Verify"` to read the verification email; stdout is the trial-initiation URL — capture it for the next flow.
+     - Feedback: trial-initiation URL is available on stdout
+- **Post-condition state(s):**
+  - Default: state:trial-verification-email-received
+
+---
+
+### flow:complete-trial-signup-with-payment
+
+- **Use when:** Completing a trial signup that requires a payment method (the marketing-site call set `paymentOptional=false`).
+- **Parameters:** `password`, `orgName`, `billingEmail`, `trialInitiationUrl` (the URL captured from `flow:trigger-trial-verification-email`)
+- **Precondition state:** state:trial-verification-email-received
+- **Steps:**
+  1. Navigate to `<trialInitiationUrl>` in the browser
+     - Feedback: "Email verified" toast appears
+  2. Step 1 — enter organization name (`<orgName>`) and billing email (`<billingEmail>`); click Next
+  3. Step 2 — enter payment method (card iframes as in `flow:purchase-premium-subscription` steps 3–6); click Next
+  4. Step 3 — set a master password to `<password>` (must be ≥12 characters); click Complete
+  5. Confirm on the confirmation page
+     - Feedback: redirect to the new trial organization
+- **Post-condition state(s):**
+  - Default: state:trialing-org-with-payment
+
+---
+
+### flow:complete-trial-signup-without-payment
+
+- **Use when:** Completing a trial signup that does not require a payment method (the marketing-site call set `paymentOptional=true`).
+- **Parameters:** `password`, `orgName`, `billingEmail`, `trialInitiationUrl` (the URL captured from `flow:trigger-trial-verification-email` invoked with `paymentOptional=true`)
+- **Precondition state:** state:trial-verification-email-received
+- **Steps:**
+  1. Navigate to `<trialInitiationUrl>` in the browser
+     - Feedback: "Email verified" toast appears
+  2. Step 1 — enter organization name (`<orgName>`) and billing email (`<billingEmail>`); click Next
+  3. Step 2 — set a master password to `<password>` (must be ≥12 characters); click Complete (the payment step is skipped because the trigger was called with `paymentOptional=true`)
+  4. Confirm on the confirmation page
+     - Feedback: redirect to the new trial organization
+- **Post-condition state(s):**
+  - Default: state:trialing-org-without-payment
+
+---
+
+### flow:complete-trial-signup-existing-user
+
+- **Use when:** Completing a trial signup when the verifying email is already registered with a Bitwarden account; the verification link routes to `/create-organization` instead of `/trial-initiation`.
+- **Parameters:** `orgName`, `billingEmail`, `planTier`, `trialInitiationUrl` (the URL captured from `flow:trigger-trial-verification-email`)
+- **Precondition state:** state:trial-verification-email-received
+- **Steps:**
+  1. Navigate to `<trialInitiationUrl>` in the browser; the app routes to `https://localhost:8080/#/create-organization`
+     - Feedback: routes to `/create-organization`
+  2. Select `<planTier>`
+  3. Fill in Organization Name with `<orgName>` and Billing Email with `<billingEmail>`
+  4. In the Payment Information section, select Credit Card
+  5. Fill the card number iframe (`frameLocator('[title="Secure card number input frame"]')`): `4242424242424242`
+  6. Fill the expiry iframe (`frameLocator('[title="Secure expiration date input frame"]')`): `12/29`
+  7. Fill the CVC iframe (`frameLocator('[title="Secure CVC input frame"]')`): `123`
+  8. Set Country to `United States` and fill the Postal Code field with `12345`
+  9. Submit the form
+     - Feedback: success redirect to the new org's vault or settings page
+- **Post-condition state(s):**
+  - Default: state:authenticated-with-paid-org
+
+**Note:** This flow assumes the user from whose mailbox the verification email was retrieved is already logged in. The trial-existing-user path requires the existing session to persist; the verification URL only works for the logged-in user matching the email.
+
+---
+
+### flow:authenticate-admin-portal
+
+- **Use when:** Any test that requires administrative setup (creating discounts, managing users, verifying subscription state).
+- **Parameters:** `bitwarden-portal-admin-email`
+- **Precondition state:** none
+- **Steps:**
+  1. Navigate to `http://localhost:62911`
+     - Feedback: redirect to the Admin portal login page
+  2. Enter `<bitwarden-portal-admin-email>` in the login field
+  3. Submit the form
+     - Feedback: form clears; magic-link email sent
+  4. Run `read-mailcatcher.sh --recipient <bitwarden-portal-admin-email> --pattern "Continue Logging In"` to read the magic link (subject contains "Admin" or "Continue Logging In"); stdout is the URL
+  5. Navigate directly to the extracted magic-link URL
+     - Feedback: Admin portal home loads, authenticated
+- **Post-condition state(s):**
+  - Default: state:admin-portal-authenticated
diff --git a/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/SKILL.md b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/SKILL.md
new file mode 100644
index 0000000..51f084d
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/SKILL.md
@@ -0,0 +1,157 @@
+---
+name: reading-mailcatcher-api
+description: Use this skill whenever you need to read an email from the local Bitwarden Mailcatcher inbox — account verification, magic link login, trial activation, OTP codes, password resets, or any other email-driven flow. Uses cURL against the Mailcatcher REST API at http://localhost:1080 to find a message by recipient or subject and extract URLs or tokens from its body. Prefer this over the Mailcatcher browser UI in automated contexts (Playwright's browser CORS restrictions block direct fetch access). Invoke whenever a workflow needs to read, click, or extract content from a message Bitwarden just sent — including account creation, login flows, organization invites, trial activations, and password resets.
+argument-hint: --recipient <email> --pattern <subject-keyword> [--link-filter <regex>]
+allowed-tools: [Bash, Read]
+---
+
+## Quick reference — use the script
+
+For all programmatic uses (test runs, ad-hoc fetches, debugging), call the co-located script directly:
+
+```
+bash ${CLAUDE_SKILL_DIR}/scripts/read-mailcatcher.sh --recipient <email> --pattern <subject-keyword> [--link-filter <regex>]
+```
+
+- **stdout** (on success): the extracted URL, ready to navigate to or paste into a form field
+- **exit 1 + stderr** (on failure): `NO_MATCH: <diagnostic>` — either no message matched after one retry, or the matched message contained no URL passing the link filter
+
+The script already retries once after a 3-second sleep on the first miss; callers don't need their own retry loop. The procedural reference below documents the underlying Mailcatcher API the script wraps — read it when modifying the script, debugging unexpected output, or doing a one-off curl by hand.
+
+## User invocation
+
+This skill is user-invocable. From any Claude Code session you can trigger it directly with the arguments declared in the `argument-hint` frontmatter — Claude will run the script and return the extracted URL (or the `NO_MATCH` diagnostic). Useful for debugging email flows, exploring Mailcatcher contents, or sanity-checking the script outside the test pipeline.
+
+Example:
+
+```
+--recipient testuser-s1@example.com --pattern "Verify"
+```
+
+## When to Use
+
+Invoke this skill whenever a workflow needs to:
+- Click a verification link sent to a new account's email
+- Log into the Admin Portal via magic link
+- Activate a trial or invite via a link in a welcome/trial email
+- Extract a one-time code or token from any email body
+
+## Prerequisites
+
+Mailcatcher must be running (Docker Compose service). Verify with:
+
+```bash
+curl -s http://localhost:1080/messages > /dev/null && echo "OK" || echo "Mailcatcher not running"
+```
+
+## Step-by-Step Workflow
+
+### Step 1 — List all messages
+
+```bash
+curl -s http://localhost:1080/messages
+```
+
+Returns a JSON array of message objects:
+
+```json
+[
+  {
+    "id": 42,
+    "sender": "<noreply@bitwarden.com>",
+    "recipients": ["<user@example.com>"],
+    "subject": "Verify Your Email",
+    "created_at": "2026-04-21T10:00:00Z",
+    "size": "4200",
+    "formats": ["html", "plain"]
+  }
+]
+```
+
+### Step 2 — Find the target message
+
+Filter by **recipient email** and/or **subject keyword** and select the **highest ID** (most recent):
+
+```bash
+curl -s http://localhost:1080/messages | python3 -c "
+import sys, json
+
+msgs = json.load(sys.stdin)
+
+target_email = 'user@example.com'
+subject_keyword = 'Verify'
+matches = [m for m in msgs if
+    any(target_email in r for r in m['recipients']) and
+    subject_keyword.lower() in m['subject'].lower()
+]
+if not matches:
+    print('NO_MATCH')
+    sys.exit()
+
+best = max(matches, key=lambda m: m['id'])
+print(best['id'])
+"
+```
+
+**Handle both outcomes before proceeding:**
+- `NO_MATCH` — no matching email yet; wait 3–5 seconds and retry (up to ~30 s total before giving up)
+- A numeric ID — proceed to Step 3
+
+**When filtering:**
+- Match on recipient email when the test account address is known (preferred)
+- Match on subject keyword when recipient is generic/unknown
+- Always take `max(id)` — higher ID = more recent message
+
+### Step 3 — Fetch the message body
+
+For link/token extraction, plain text is usually sufficient and easier to parse:
+
+```bash
+MSG_ID=42
+curl -s http://localhost:1080/messages/${MSG_ID}.plain
+```
+
+Use `.html` only when the plain text body is empty or the link is only in the HTML part:
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.html
+```
+
+### Step 4 — Extract the link or token
+
+**Extract any URL matching a keyword pattern:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https?://[^ >)"]+' | grep -i 'verify\|confirm\|signup\|token\|trial\|login' | head -1
+```
+
+**Extract an admin magic link:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'http://localhost:62911/login/confirm[^ >)"]+' | head -1
+```
+
+**Extract a web vault verification/signup link:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https://localhost:8080/#/[^ >)"]+' | head -1
+```
+
+## Common Email Types and Patterns
+
+See `${CLAUDE_SKILL_DIR}/references/email-patterns.md` for subject lines, link formats, and extraction commands for all common Bitwarden email types.
+
+## Important Notes
+
+- **Tokens expire** — extract and use links immediately; do not cache them for later steps
+- **No auth required** — Mailcatcher runs with no credentials on localhost:1080
+- **High-volume sessions** — when many test accounts are created, always filter by recipient email, not just subject, to avoid getting the wrong message
+- **CORS blocker** — never attempt `fetch('http://localhost:1080/...')` from Playwright's browser context; always use curl from the agent shell
+- **Delete messages** — if isolation is needed, `curl -X DELETE http://localhost:1080/messages` clears all messages. **ALWAYS ask the user before running this command** — it is irreversible and will destroy evidence from earlier test steps.
+
+## Result
+
+See the **Quick reference** at the top of this file for the script's exit-and-stdout contract — that is the authoritative return shape.
diff --git a/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/references/email-patterns.md b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/references/email-patterns.md
new file mode 100644
index 0000000..c6cc9f7
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/references/email-patterns.md
@@ -0,0 +1,95 @@
+# Bitwarden Email Patterns for Mailcatcher
+
+## Account Verification (New Registration)
+
+**Subject:** `Verify Your Email`
+**Recipient:** The new account email address
+**Link format:** `https://localhost:8080/#/finish-signup?token=BwRegistrationEmailVerificationToken&email=<encoded>`
+
+**Extraction:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https://localhost:8080/#/finish-signup[^ >)"]+' | head -1
+```
+
+---
+
+## Admin Portal Magic Link Login
+
+**Subject:** `[Admin] Continue Logging In` or `Continue Logging In`
+**Recipient:** Admin email (find in `server/dev/secrets.json`, key `"admins"`)
+**Link format:** `http://localhost:62911/login/confirm?email=<admin>&token=<token>&returnUrl=/`
+
+**Extraction:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'http://localhost:62911/login/confirm[^ >)"]+' | head -1
+```
+
+---
+
+## Trial Activation Link
+
+**Subject:** Varies — check for `trial`, `start`, `activate`
+**Recipient:** Trial initiator email
+**Link format:** `https://localhost:8080/#/...?trialLength=...&token=...`
+
+**Extraction:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https?://localhost[^ >)"]+' | grep -iE 'trial|verify|token|register' | head -1
+```
+
+---
+
+## Organization Invite
+
+**Subject:** `Join <OrgName> on Bitwarden`
+**Recipient:** Invited user email
+**Link format:** `https://localhost:8080/#/accept-organization?orgId=...&orgUserId=...&token=...`
+
+**Extraction:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https://localhost:8080/#/accept-organization[^ >)"]+' | head -1
+```
+
+---
+
+## Emergency Access Invite
+
+**Subject:** `Emergency Access Request`
+**Recipient:** Grantee email
+**Link format:** `https://localhost:8080/#/accept-emergency?id=...&token=...`
+
+**Extraction:**
+
+```bash
+curl -s http://localhost:1080/messages/${MSG_ID}.plain | \
+  grep -oE 'https://localhost:8080/#/accept-emergency[^ >)"]+' | head -1
+```
+
+---
+
+## Welcome Email (No Action Required)
+
+**Subject:** `Welcome to Bitwarden!`
+**Purpose:** Confirmation only — no link extraction needed
+**Verification:** Confirm receipt to validate registration completed
+
+---
+
+## API Quick Reference
+
+| Operation | Command |
+|-----------|---------|
+| List all messages | `curl -s http://localhost:1080/messages` |
+| Get plain text body | `curl -s http://localhost:1080/messages/{id}.plain` |
+| Get HTML body | `curl -s http://localhost:1080/messages/{id}.html` |
+| Get JSON metadata | `curl -s http://localhost:1080/messages/{id}.json` |
+| Delete specific message | `curl -X DELETE http://localhost:1080/messages/{id}` |
+| Clear all messages | `curl -X DELETE http://localhost:1080/messages` — **ALWAYS ask user first; irreversible** |
diff --git a/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh
new file mode 100755
index 0000000..aafd754
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/reading-mailcatcher-api/scripts/read-mailcatcher.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+# Fetch a Bitwarden Mailcatcher message matching a recipient + subject, and print
+# the first matching URL from its body on stdout. Exit 0 on success.
+# On NO_MATCH, exits 1 with a single-line diagnostic on stderr.
+#
+# Usage:
+#   read-mailcatcher.sh --recipient <email> [--pattern <subject-keyword>] [--link-filter <regex>]
+#
+# --pattern is optional. Omit (or pass empty) to match any subject and just take the
+# most recent message for the recipient.
+#
+# Defaults:
+#   --link-filter: verify|confirm|signup|token|trial|login|finish-signup
+#
+# Designed to be called via the Bash tool from the test-runner. The skill body in
+# ../SKILL.md documents the underlying Mailcatcher REST API this wraps.
+
+set -u
+
+MAILCATCHER_URL="${MAILCATCHER_URL:-http://localhost:1080}"
+RECIPIENT=""
+PATTERN=""
+LINK_FILTER="verify|confirm|signup|token|trial|login|finish-signup"
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --recipient)   RECIPIENT="$2"; shift 2 ;;
+    --pattern)     PATTERN="$2"; shift 2 ;;
+    --link-filter) LINK_FILTER="$2"; shift 2 ;;
+    -h|--help)
+      sed -n '2,12p' "$0" | sed 's/^# \{0,1\}//' >&2
+      exit 0
+      ;;
+    *)
+      echo "Unknown argument: $1" >&2
+      exit 2
+      ;;
+  esac
+done
+
+if [ -z "$RECIPIENT" ]; then
+  echo "ERROR: --recipient is required" >&2
+  exit 2
+fi
+
+find_message_id() {
+  curl -fsS "$MAILCATCHER_URL/messages" 2>/dev/null | RECIPIENT="$RECIPIENT" PATTERN="$PATTERN" python3 -c "
+import sys, json, os
+
+try:
+    msgs = json.load(sys.stdin)
+except Exception:
+    sys.exit(2)
+
+recipient = os.environ['RECIPIENT'].lower()
+pattern = os.environ['PATTERN'].lower()
+
+matches = [
+    m for m in msgs
+    if any(recipient in r.lower() for r in m.get('recipients', []))
+    and (not pattern or pattern in m.get('subject', '').lower())
+]
+if not matches:
+    sys.exit(1)
+print(max(matches, key=lambda m: m['id'])['id'])
+"
+}
+
+extract_url() {
+  local id="$1"
+  local body
+  body="$(curl -fsS "$MAILCATCHER_URL/messages/${id}.plain" 2>/dev/null || true)"
+  if [ -z "$body" ]; then
+    body="$(curl -fsS "$MAILCATCHER_URL/messages/${id}.html" 2>/dev/null || true)"
+  fi
+  printf '%s' "$body" | grep -oE 'https?://[^ >\")]+' | grep -iE "$LINK_FILTER" | head -1
+}
+
+attempt() {
+  local id
+  id="$(find_message_id)"
+  if [ -z "$id" ]; then
+    return 1
+  fi
+  local url
+  url="$(extract_url "$id")"
+  if [ -z "$url" ]; then
+    echo "NO_MATCH: message $id matched but contained no URL filtered by '$LINK_FILTER'" >&2
+    return 2
+  fi
+  printf '%s\n' "$url"
+  return 0
+}
+
+if attempt; then
+  exit 0
+fi
+
+# One retry — Mailcatcher may not have received the message yet.
+sleep 3
+if attempt; then
+  exit 0
+fi
+
+if [ -n "$PATTERN" ]; then
+  echo "NO_MATCH: no email for recipient '$RECIPIENT' with subject containing '$PATTERN'" >&2
+else
+  echo "NO_MATCH: no email for recipient '$RECIPIENT'" >&2
+fi
+exit 1
diff --git a/plugins/bitwarden-playwright-testing/skills/test-web-changes/SKILL.md b/plugins/bitwarden-playwright-testing/skills/test-web-changes/SKILL.md
new file mode 100644
index 0000000..1e32cbc
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/test-web-changes/SKILL.md
@@ -0,0 +1,272 @@
+---
+name: test-web-changes
+description: End-to-end Playwright testing pipeline for local Bitwarden web changes. Uses an agent team to generate test cases from a Jira ticket or feature implementation plan, start required services, run Playwright tests, and produce an HTML report — all in a single command. Use when you want to plan and run UI tests for local web changes without manual steps. Accepts a Jira ticket ID, a feature implementation plan file path, or a feature description. Add --confirm to pause for test case review before starting test execution.
+argument-hint: "<jira-ticket-id | feature-plan-path | feature-description> [--confirm]"
+allowed-tools: [Read, Write, Bash]
+---
+
+You are the team lead for the Bitwarden web test pipeline. Your role is orchestration plus artifact persistence: you dispatch agents, wait for them to complete, and write their responses to artifact files. You do no research, exploration, or test execution yourself.
+
+## Step 0 — Parse input
+
+Extract from the arguments:
+
+- **`--confirm` flag**: present or absent. If present, strip it from the remaining input.
+- **Input value**: the remaining argument text after stripping the flag above.
+- **Input type**: detect from the input value:
+  - Jira ticket: matches `[A-Z]+-\d+` (e.g., `PM-12345`)
+  - Plan file: ends with `.md` and looks like a file path
+  - Free-form description: anything else
+
+**Generate timestamp** (`YYYYMMDD-HHmm`) once now. Reuse it for all artifact filenames and <timestamp> placeholders in this run.
+
+**Derive slug** from the input value: lowercase, spaces and underscores replaced with hyphens, truncated to 40 chars. Fallback: `pwt-<timestamp>`.
+
+**Create output directory** and derive the `<artifacts-output-dir>` token: resolve the absolute path `<current working directory>/.playwright-testing-artifacts/<slug>/`, create that directory, and use it for `<artifacts-output-dir>` in every artifact path in the steps below.
+
+---
+
+## Step 1 — Create team and add teammates
+
+Create team named `pwt-<slug>`. Add all seven teammates:
+
+| Teammate | Agent type |
+|---|---|
+| `context-gatherer` | `bitwarden-playwright-testing:context-gatherer` |
+| `code-explorer` | `bitwarden-playwright-testing:code-explorer` |
+| `service-mapper` | `bitwarden-playwright-testing:service-mapper` |
+| `test-planner` | `bitwarden-playwright-testing:test-planner` |
+| `service-manager` | `bitwarden-playwright-testing:service-manager` |
+| `test-runner` | `bitwarden-playwright-testing:test-runner` |
+| `report-compiler` | `bitwarden-playwright-testing:report-compiler` |
+
+All teammates wait for explicit dispatch. They must not self-activate.
+
+---
+
+## Task 1: Gather context
+
+Dispatch `context-gatherer` with:
+
+```
+Input type: <jira-ticket | plan-file | description>
+Input value: <value>
+```
+
+Wait for completion. The agent returns the full context as a markdown response.
+
+**Persist artifact**: Write the agent's response text verbatim to `<artifacts-output-dir>/context-<timestamp>.md` using the `Write` tool.
+
+---
+
+## Task 2: Explore codebase *(blockedBy: Task 1)*
+
+Dispatch `code-explorer` with:
+
+```
+Context artifact path: <artifacts-output-dir>/context-<timestamp>.md
+```
+
+Wait for completion. The agent returns the Application Context as a markdown response.
+
+**Persist artifact**: Write the agent's response text verbatim to `<artifacts-output-dir>/app-context-<timestamp>.md` using the `Write` tool.
+
+---
+
+## Task 3: Determine required services *(blockedBy: Task 2)*
+
+Dispatch `service-mapper` with:
+
+```
+Context artifact path: <artifacts-output-dir>/context-<timestamp>.md
+App-context artifact path: <artifacts-output-dir>/app-context-<timestamp>.md
+```
+
+Wait for completion. The agent returns the services list as a markdown response.
+
+**Persist artifact**: Write the agent's response text verbatim to `<artifacts-output-dir>/services-<timestamp>.md` using the `Write` tool.
+
+---
+
+## Task 4: Build test cases *(blockedBy: Task 2)*
+
+Dispatch `test-planner` with:
+
+```
+Context artifact path: <artifacts-output-dir>/context-<timestamp>.md
+App-context artifact path: <artifacts-output-dir>/app-context-<timestamp>.md
+```
+
+Wait for completion. The agent returns the test cases as a markdown response. The response begins with the `## Test Cases` heading.
+
+**Persist artifact**: Write the agent's response text verbatim to `<artifacts-output-dir>/test-cases-<timestamp>.md` using the `Write` tool.
+
+---
+
+## Task 5: Compose test plan *(blockedBy: Task 4)*
+
+This is pure team-lead work — no agent dispatch. Read both planning artifacts and assemble the final test plan.
+
+1. Read `<artifacts-output-dir>/services-<timestamp>.md` — this is the full services list.
+2. Read `<artifacts-output-dir>/test-cases-<timestamp>.md` — this is the full test-cases list.
+3. Write `<artifacts-output-dir>/test-plan-<timestamp>.md` using this exact template:
+
+```markdown
+# Test Plan
+
+**Generated:** <timestamp>
+
+<contents of services-<timestamp>.md, verbatim>
+
+<contents of test-cases-<timestamp>.md, verbatim>
+```
+
+---
+
+## Shut down planning teammates
+
+Shut down `context-gatherer`, `code-explorer`, `service-mapper`, and `test-planner`. Standing teammates (`service-manager`, `test-runner`, `report-compiler`) remain.
+
+---
+
+## Optional review gate *(only if `--confirm` was set)*
+
+Read `<artifacts-output-dir>/test-plan-<timestamp>.md`. Count the test cases and extract their names.
+
+Display:
+
+> "Test plan written to `<artifacts-output-dir>/test-plan-<timestamp>.md`
+>
+> **Test Cases (<N>):**
+> - <test case name 1>
+> - <test case name 2>
+> - ...
+>
+> Proceed with test execution? (yes/no)"
+
+- **No**: shut down remaining teammates, delete team, tell user the test plan path. Stop.
+- **Yes**: continue.
+
+If `--confirm` was not set, print: "Test plan complete — proceeding to test execution." and continue immediately.
+
+---
+
+## Task 6: Verify environment health *(blockedBy: Task 5)*
+
+Dispatch `service-manager` with:
+
+```
+Test plan path: <artifacts-output-dir>/test-plan-<timestamp>.md
+Artifacts output dir: <artifacts-output-dir>
+```
+
+Wait for completion. The agent will return either:
+
+- A one-line success of the form `Environment verified: <N> services healthy, render OK.`
+- Or an error block from the verifying-environment-health skill (preflight failure, health-check timeout, or render failure).
+
+If the response is **not** the success confirmation, paste the response to the user and halt the run. Do not dispatch `test-runner`, do not write any artifact, do not run cleanup. If it is the success confirmation, proceed to Task 7.
+
+No artifact is written for this task.
+
+---
+
+## Task 7: Execute tests *(blockedBy: Task 6)*
+
+Dispatch `test-runner` with:
+
+```
+Test plan path: <artifacts-output-dir>/test-plan-<timestamp>.md
+Artifacts output dir: <artifacts-output-dir>
+```
+
+Wait for the test-runner to return a response. 
+
+### Handling test-runner pause responses
+
+When the `test-runner` response contains `Need user input:`, it is a pause response with two parts:
+
+1. **Partial results chunk**: everything up to and including `=== PARTIAL RUN — PAUSED ===`
+2. **Question**: the `Need user input:` line (always last)
+
+**On each pause:**
+
+1. Extract the partial results chunk and the question.
+2. Write/append the partial results chunk to `<artifacts-output-dir>/checkpoint-<timestamp>.md`:
+   - First pause: create the file and write the chunk.
+   - Subsequent pauses: open the file in append mode and add the chunk with a blank-line separator.
+3. Surface the question to the user and capture the answer.
+4. Re-dispatch `test-runner` with:
+
+```
+Test plan path: <artifacts-output-dir>/test-plan-<timestamp>.md
+Checkpoint path: <artifacts-output-dir>/checkpoint-<timestamp>.md
+Artifacts output dir: <artifacts-output-dir>
+Resume: A prior test-runner agent paused at a [HUMAN] step. The user has now completed that action.
+  Paused at: <verbatim text after "Need user input: ">
+  User's answer: <user's answer>
+```
+
+5. Repeat from step 1 if the new agent also pauses.
+
+### Handling test-runner complete response
+
+When the test-runner returns a response containing `=== TEST RUN COMPLETE` (the full marker includes totals, e.g. `=== TEST RUN COMPLETE: 3 total, 2 passed, 0 passed (adaptive), 1 failed ===`), proceed to persist the artifact.
+
+### Persist artifact 
+
+Write `<artifacts-output-dir>/test-results-<timestamp>.md`. The file is one bare raw output block — no headers or markdown or any added prose or commentary.
+
+**If no test pauses occurred** (no `checkpoint-<timestamp>.md` file exists): write the test-runner's response verbatim using the `Write` tool. It is already a single raw output block.
+
+**If test pauses occurred** (checkpoint file exists): append the final raw output segment from the test-runner's response to `checkpoint-<timestamp>.md` with a blank-line separator, then assemble one merged raw output block:
+
+*Note: the checkpoint file contains multiple raw output segments separated by blank lines. Each segment begins with `=== TEST RUN RESULTS ===` and ends with either `=== PARTIAL RUN — PAUSED ===` or `=== TEST RUN COMPLETE: ... ===`. Discard all segment headers, all intermediate `SUMMARY:` lines, and all `=== PARTIAL RUN — PAUSED ===` markers.*
+
+1. Read `checkpoint-<timestamp>.md` in full.
+2. Collect every `--- TEST CASE N: <name> --- ... --- END TEST CASE N ---` block across all segments, in order.
+3. Sum the `SUMMARY:` counts across all segments to produce final totals (total, passed, adaptive, failed).
+4. Write `test-results-<timestamp>.md` as exactly one block, verbatim:
+   ```
+   === TEST RUN RESULTS ===
+
+   SUMMARY: <summed total> test cases | <summed passed> passed | <summed adaptive> passed (adaptive) | <summed failed> failed
+
+   <all test case blocks from step 2, in order>
+
+   === TEST RUN COMPLETE: <total> total, <passed> passed, <adaptive> passed (adaptive), <failed> failed ===
+   ```
+
+Capture the final totals from the `=== TEST RUN COMPLETE: ... ===` marker — you will reuse them in the Shutdown summary.
+
+---
+
+## Task 8: Compile report *(blockedBy: Task 7)*
+
+Dispatch `report-compiler` with:
+
+```
+Test plan path: <artifacts-output-dir>/test-plan-<timestamp>.md
+Test results path: <artifacts-output-dir>/test-results-<timestamp>.md
+```
+
+Wait for completion. The agent returns a single fenced ```html``` block containing the full HTML document.
+
+**Persist artifact**: Extract the HTML body (the content between the ```html and ``` fences) and write it verbatim to `<artifacts-output-dir>/report-<timestamp>.html` using the `Write` tool.
+
+---
+
+## Shutdown
+
+Shut down remaining teammates (`service-manager`, `test-runner`, `report-compiler`). Delete team `pwt-<slug>`.
+
+Present final summary:
+
+```
+Test run complete for <input value>
+
+Test plan: <artifacts-output-dir>/test-plan-<timestamp>.md
+Report (HTML): <artifacts-output-dir>/report-<timestamp>.html
+
+Results: <N> total | <N> passed | <N> passed (adaptive) | <N> failed
+```
diff --git a/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/SKILL.md b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/SKILL.md
new file mode 100644
index 0000000..fcff8c1
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/SKILL.md
@@ -0,0 +1,67 @@
+---
+name: verifying-environment-health
+description: Verify the Bitwarden local dev environment is ready for testing — Docker dev containers via preflight, application services via the health-check script, and Angular bootstrap via render verification. Halts on the first failure. Use after determining required services and before executing tests. Requires the `playwright-cli` plugin for render verification.
+---
+
+Given the list of required services and the primary test URL, confirm the local dev environment is ready to run Playwright tests. The user is responsible for starting all services before this skill runs — this skill never starts, builds, or stops anything.
+
+The procedure is linear and halts on the first failure. Each step has a specific failure message intended to point the user at the missing piece of their environment.
+
+## Inputs
+
+- **Required service names:** a list of names (e.g., `Api`, `Identity`, `Web`) drawn from the test plan's `## Required Services` block. These names are the argv for `scripts/health-check.sh` — see that script for the full list of accepted names.
+- **Primary test URL:** the URL the test run will navigate to first. Either `https://localhost:8080` (web vault) or `http://localhost:62911` (Bitwarden Portal). Drives the render-verify step.
+- **Artifacts output dir:** absolute path to the run's artifacts folder. The render-verify screenshot is saved under `<artifacts-output-dir>/screenshots/`.
+
+## Procedure
+
+### 1. Preflight check (Docker daemon + dev containers)
+
+```bash
+bash ${CLAUDE_SKILL_DIR}/scripts/preflight-check.sh
+```
+
+The script verifies the Docker daemon is reachable and that the expected Bitwarden dev containers are running (mssql, mailcatcher, azurite). It accepts both Compose and Aspire naming patterns.
+
+If the script exits non-zero, **STOP**. Paste its stdout/stderr verbatim to the caller and do not continue. The script already prints a `Resolve:` hint covering both Compose and Aspire workflows.
+
+### 2. Application health check
+
+```bash
+bash ${CLAUDE_SKILL_DIR}/scripts/health-check.sh <ServiceName1> [<ServiceName2> ...]
+```
+
+Pass the required service names verbatim. Accepted names: `Api`, `Identity`, `Billing`, `billing-pricing`, `Web`, `Admin`, `Notifications`, `Events`, `Icons`. Override the 360s default timeout with `HEALTH_CHECK_TIMEOUT=<seconds>`.
+
+If the script exits non-zero, **STOP**. Paste the script's stdout verbatim to the caller and add a one-line hint: `Service <first-not-ready-name> is not responding. Start it and re-run.` (The script's own output already lists every service that did not respond and its last HTTP status.)
+
+### 3. Render verification (required — HTTP 200 is not sufficient)
+
+Generate a `YYYYMMDD-HHmm` timestamp once. Use the `playwright-cli` skill (via the `Skill` tool) to navigate to the primary test URL and take a full-page screenshot, saving it to the run's artifacts folder:
+
+```
+screenshot --filename=<artifacts-output-dir>/screenshots/render-verify-<timestamp>.png --full-page
+```
+
+**Web vault (`https://localhost:8080`)**: inspect for any of:
+- A webpack compilation error overlay (text `Compiled with problems:`).
+- A blank or all-white page (Angular failed to bootstrap).
+- Any other full-page error state that prevents normal UI interaction.
+
+If any of these is present, **STOP**. Report the failure with the screenshot path. The webpack dev server returns HTTP 200 even when Angular compilation failed, so only a visual render check is reliable.
+
+**Bitwarden Portal (`http://localhost:62911`)**: a redirect to the login page is the expected healthy state — the Portal is .NET Razor, not Angular/webpack. Confirm the login page loaded; any 5xx response or blank page is a failure. Do not check for webpack errors.
+
+## Output
+
+On success, return a single line of the form:
+
+```
+Environment verified: <N> services healthy, render OK.
+```
+
+where `<N>` is the count of service names passed to step 2.
+
+On failure at any step, return the offending step's output verbatim (script stdout/stderr or render screenshot path + description), with no further work and no success line.
+
+This skill writes no markdown artifact.
diff --git a/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/health-check.sh b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/health-check.sh
new file mode 100755
index 0000000..790993b
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/health-check.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# health-check.sh — Poll Bitwarden local dev services until all are ready.
+#
+# Usage:   ./health-check.sh <Service1> [Service2] ...
+# Example: ./health-check.sh Api Identity Web
+#
+# Available service names:
+#   Api, Identity, Billing, billing-pricing, Web, Admin,
+#   Notifications, Events, Icons
+#
+# Override timeout (default 360s):
+#   HEALTH_CHECK_TIMEOUT=60 ./health-check.sh Api
+#
+# Exit 0: all services ready.
+# Exit 1: timeout or unknown service name.
+
+get_url() {
+  case "$1" in
+    Api)             echo "http://localhost:4000/alive" ;;
+    Identity)        echo "http://localhost:33656/alive" ;;
+    Billing)         echo "http://localhost:44519/alive" ;;
+    billing-pricing) echo "http://localhost:5082/alive" ;;
+    Web)             echo "https://localhost:8080" ;;
+    Admin)           echo "http://localhost:62911" ;;
+    Notifications)   echo "http://localhost:61840" ;;
+    Events)          echo "http://localhost:46273" ;;
+    Icons)           echo "http://localhost:50024" ;;
+    *)               echo "" ;;
+  esac
+}
+
+TIMEOUT="${HEALTH_CHECK_TIMEOUT:-360}"
+
+if [ $# -eq 0 ]; then
+  echo "Usage: $0 <Service1> [Service2] ..."
+  echo "Available: Api, Identity, Billing, billing-pricing, Web, Admin (Bitwarden Portal), Notifications, Events, Icons"
+  echo "Override timeout: HEALTH_CHECK_TIMEOUT=60 $0 Api"
+  exit 1
+fi
+
+# Validate all names upfront and build a deduplicated space-separated list
+SERVICES=""
+for svc in "$@"; do
+  url=$(get_url "$svc")
+  if [ -z "$url" ]; then
+    echo "Unknown service: $svc"
+    echo "Available: Api, Identity, Billing, billing-pricing, Web, Admin (Bitwarden Portal), Notifications, Events, Icons"
+    exit 1
+  fi
+  case " $SERVICES " in
+    *" $svc "*) ;;  # already in list, skip
+    *) SERVICES="$SERVICES $svc" ;;
+  esac
+done
+SERVICES="${SERVICES# }"  # trim leading space
+
+TOTAL=$(echo "$SERVICES" | wc -w | tr -d ' ')
+READY=""
+READY_COUNT=0
+START=$SECONDS
+
+echo "Waiting for $TOTAL service(s): $SERVICES (timeout: ${TIMEOUT}s)"
+
+while true; do
+  for svc in $SERVICES; do
+    # Skip if already marked ready
+    case " $READY " in
+      *" $svc "*) continue ;;
+    esac
+
+    URL=$(get_url "$svc")
+    STATUS=$(curl -k -s -o /dev/null -w "%{http_code}" --max-time 3 "$URL" 2>/dev/null)
+
+    if [ "$STATUS" = "200" ] || [ "$STATUS" = "302" ]; then
+      READY="$READY $svc"
+      READY_COUNT=$((READY_COUNT + 1))
+      echo "  ✅ $svc ready ($(( SECONDS - START ))s elapsed)"
+    fi
+  done
+
+  # All services ready — exit immediately
+  if [ "$READY_COUNT" -ge "$TOTAL" ]; then
+    break
+  fi
+
+  # Timed out — report failures and exit
+  if [ $((SECONDS - START)) -ge "$TIMEOUT" ]; then
+    echo "⚠️  Timeout after ${TIMEOUT}s. Not ready:"
+    for svc in $SERVICES; do
+      case " $READY " in
+        *" $svc "*) ;;
+        *)
+          URL=$(get_url "$svc")
+          STATUS=$(curl -k -s -o /dev/null -w "%{http_code}" --max-time 3 "$URL" 2>/dev/null)
+          echo "  ❌ $svc — HTTP $STATUS"
+          ;;
+      esac
+    done
+    exit 1
+  fi
+
+  # Print which services are still pending before sleeping
+  PENDING=""
+  for svc in $SERVICES; do
+    case " $READY " in
+      *" $svc "*) ;;
+      *) PENDING="$PENDING $svc" ;;
+    esac
+  done
+  echo "  ⏳ Still waiting: ${PENDING# } ($(( SECONDS - START ))s elapsed)"
+  sleep 5
+done
+
+echo "✅ All $TOTAL service(s) ready ($(( SECONDS - START ))s total)"
diff --git a/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/preflight-check.sh b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/preflight-check.sh
new file mode 100755
index 0000000..9e83a90
--- /dev/null
+++ b/plugins/bitwarden-playwright-testing/skills/verifying-environment-health/scripts/preflight-check.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+#
+# preflight-check.sh
+#
+# Verify environmental preconditions for the bitwarden-playwright-testing
+# pipeline. Exits 0 if all preconditions are met; exits non-zero with a
+# structured message naming what is missing and how to resolve it.
+#
+# The Bitwarden dev environment can be started via either the legacy
+# Docker Compose workflow (server/dev/docker-compose.yml) or the newer
+# .NET Aspire AppHost workflow (server/AppHost). The two workflows use
+# different container names for the same logical service, so for each
+# required service the script accepts either naming pattern.
+#
+# Compose names look like:  bitwardenserver-<service>-1
+# Aspire names look like:   <service>-<random-suffix>
+
+set -u
+
+# 1. Docker daemon reachable
+if ! docker info >/dev/null 2>&1; then
+  cat >&2 <<'EOF'
+Preflight check failed:
+  - Docker daemon is not reachable.
+    Resolve: start Docker Desktop (or the docker service), then re-run the pipeline.
+EOF
+  exit 1
+fi
+
+# 2. Required Bitwarden dev containers.
+# Each row: <human label>|<compose name regex>|<aspire name regex>
+REQUIRED_SERVICES=(
+  "MSSQL database|-mssql-|^mssql-"
+  "Mailcatcher email|-mail-|^mailcatcher-"
+  "Azurite storage|-storage-|^azurite-"
+)
+
+RUNNING_NAMES=$(docker ps --format '{{.Names}}')
+
+MISSING=""
+for entry in "${REQUIRED_SERVICES[@]}"; do
+  IFS='|' read -r label compose_pat aspire_pat <<< "$entry"
+  if ! grep -qE -- "${compose_pat}|${aspire_pat}" <<< "${RUNNING_NAMES}"; then
+    MISSING+="  - ${label}: no running container matched '${compose_pat}' (Compose) or '${aspire_pat}' (Aspire)."$'\n'
+  fi
+done
+
+if [[ -n "${MISSING}" ]]; then
+  {
+    echo "Preflight check failed:"
+    printf "%s" "${MISSING}"
+    echo "    Resolve: start the Bitwarden dev environment. Either:"
+    echo "      Compose: cd <bitwarden-root>/server/dev && docker compose up -d"
+    echo "      Aspire:  cd <bitwarden-root>/server/AppHost && dotnet run"
+  } >&2
+  exit 1
+fi
+
+echo "Preflight check passed."
+exit 0