diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 6ce73d0..dbfa641 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -92,6 +92,12 @@ "source": "./plugins/bitwarden-design-tools", "version": "0.1.0", "description": "Design toolkit for Bitwarden — non-persona skills for the design lifecycle. Content style guide reference, Figma Dev Mode MCP usage, Bitwarden brand application, design-to-engineering handoff prep, Design System governance, and the Product and Design Jira workflow. Composed by the bitwarden-designer agent and usable standalone." + }, + { + "name": "bitwarden-test-engineer", + "source": "./plugins/bitwarden-test-engineer", + "version": "1.0.0", + "description": "Test engineering toolkit for Bitwarden. Hosts role-specific testing agents — currently a test strategist that recommends what to test, at which layer, and why (risk-weighted, shaped to each repo) and inventories existing coverage. Designed to grow additional roles such as an SDET or a QA engineer." } ] } diff --git a/.cspell.json b/.cspell.json index 7f702a6..ce3f3c4 100644 --- a/.cspell.json +++ b/.cspell.json @@ -3,6 +3,7 @@ "version": "0.2", "words": [ "accum", + "actioned", "adf", "AKIA", "anthropics", @@ -12,6 +13,7 @@ "askable", "ASVS", "atlassian", + "automatable", "Bitwarden", "blocklist", "blogposts", @@ -25,11 +27,13 @@ "codeBlock", "CODEOWNERS", "Confluence", + "Consolas", "CQL", "customfield", "cvss", "Dashlane", "dast", + "detekt", "docstrings", "dread", "duedate", @@ -50,6 +54,7 @@ "Gatekeeping", "GHAS", "ghsa", + "getline", "gofmt", "gradlew", "grype", @@ -60,17 +65,21 @@ "hotspots", "IDOR", "inclusivity", + "inlines", "issueIdOrKey", "issuelinks", "issuetype", "Jira", "JQL", "keyserver", + "ktlint", "lockdown", "lockfiles", "maxResults", "mcp", + "Menlo", "metacharacters", + "mockall", "modelcontextprotocol", "msword", "MVVM", @@ -78,6 +87,7 @@ "mypassword", "myproject", "Newtonsoft", + "nextest", "nextPageToken", "numstat", "NVARCHAR", @@ -94,11 +104,14 @@ "remotelink", "Rescope", "resolutiondate", + "Robolectric", "rustdoc", "sarif", + "SDET", "SDLC", "sast", "sbom", + "Segoe", "semver", "shellcheck", "shortlog", @@ -117,15 +130,22 @@ "startswith", "stride", "structurizr", + "stylesheet", + "subdirs", + "tablist", + "tabpanel", "tarpit", "thumbsup", "tinyui", + "tnum", "touchpoint", "touchpoints", "triaging", "unassigning", "unassigns", + "unfound", "ungroup", + "unlinkable", "unresponded", "unsanitized", "userflow", @@ -139,6 +159,7 @@ "wordprocessingml", "worktree", "worktrees", + "XCUI", "xoxb", "Zeroize", "zeroization", diff --git a/README.md b/README.md index bfc8c8f..5ba7c34 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ A curated collection of plugins for AI-assisted development at Bitwarden. Enable | [bitwarden-product-analyst](plugins/bitwarden-product-analyst/) | 0.1.5 | Product analyst agent for creating comprehensive Bitwarden requirements documents from multiple sources | | [bitwarden-security-engineer](plugins/bitwarden-security-engineer/) | 1.2.0 | Application security engineering: vulnerability triage, threat modeling, and secure code analysis | | [bitwarden-software-engineer](plugins/bitwarden-software-engineer/) | 1.0.0 | Software engineer agent for a Bitwarden product team. Implements stories, tasks, and bugs with code quality, performance, security, and team comms in mind. | +| [bitwarden-test-engineer](plugins/bitwarden-test-engineer/) | 1.0.0 | Test engineering toolkit: role-specific testing agents spanning the test lifecycle, starting with risk-weighted test strategy and coverage planning. | | [claude-config-validator](plugins/claude-config-validator/) | 1.1.1 | Validates Claude Code configuration files for security, structure, and quality | | [claude-retrospective](plugins/claude-retrospective/) | 1.1.1 | Analyze Claude Code sessions to identify successful patterns and improvement opportunities | diff --git a/plugins/bitwarden-test-engineer/.claude-plugin/plugin.json b/plugins/bitwarden-test-engineer/.claude-plugin/plugin.json new file mode 100644 index 0000000..0363e0e --- /dev/null +++ b/plugins/bitwarden-test-engineer/.claude-plugin/plugin.json @@ -0,0 +1,22 @@ +{ + "name": "bitwarden-test-engineer", + "version": "1.0.0", + "description": "Test engineering toolkit for Bitwarden. Hosts role-specific testing agents — currently a test strategist that recommends what to test, at which layer, and why (risk-weighted, shaped to each repo) and inventories existing coverage. Designed to grow additional roles such as an SDET or a QA engineer.", + "author": { + "name": "Bitwarden", + "url": "https://github.com/bitwarden" + }, + "homepage": "https://github.com/bitwarden/ai-plugins/tree/main/plugins/bitwarden-test-engineer", + "repository": "https://github.com/bitwarden/ai-plugins", + "keywords": [ + "testing", + "test-engineering", + "quality-engineering", + "test-strategy", + "test-automation", + "exploratory-testing", + "test-layers", + "qa", + "orchestrator" + ] +} diff --git a/plugins/bitwarden-test-engineer/CHANGELOG.md b/plugins/bitwarden-test-engineer/CHANGELOG.md new file mode 100644 index 0000000..1b109fb --- /dev/null +++ b/plugins/bitwarden-test-engineer/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +All notable changes to the Bitwarden Test Engineer Plugin will be documented in this file. +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.0.0] - 2026-06-15 + +### Added + +- Initial release of the `bitwarden-test-engineer` plugin. +- `test-strategist` agent: classifies a change's inputs (Jira ticket, GitHub PR, tech breakdown, test-case CSV, plain-language description), fans out subagents to gather evidence, and presents a test recommendation. +- `assessing-test-coverage` skill: inventories what a change is already tested by, buckets observed tests by layer, cites them as stable GitHub permalinks, and writes a self-contained HTML coverage report. +- `analyzing-test-stack` skill: maps a change's testable behaviors to the cheapest sufficient test layer per platform, surfaces coverage gaps and shape-wrong tests, and emits a self-contained HTML report. +- Shared plugin-level `references/` and a `build-report.sh` script that splices the single shared stylesheet into each report so the two reports can't drift. diff --git a/plugins/bitwarden-test-engineer/README.md b/plugins/bitwarden-test-engineer/README.md new file mode 100644 index 0000000..8999d37 --- /dev/null +++ b/plugins/bitwarden-test-engineer/README.md @@ -0,0 +1,99 @@ +# Bitwarden Test Engineer Plugin + +## Overview + +A test engineering toolkit for Bitwarden. It hosts role-specific testing agents. Today it +ships one — the **test strategist** (`test-strategist`), the test-_planning_ role: +it recommends what to test, at which layer, and why, and inventories what is already tested. +It does not author, run, or maintain the tests, nor do exploratory/manual QA. The plugin is +designed to grow additional roles over time (for example an SDET or a QA engineer). + +### First role: the test strategist + +Given a change — a feature, bugfix, refactor, or migration — the agent recommends +**what to test, at which layer, and why**, shaped to **each repo's actual test practice**. +Two ideas drive it: each behavior is tested at the cheapest layer that buys the confidence it +needs (unit, integration, or E2E), and how those layers are weighted is decided per repo — a +unit-heavy pyramid (`server`, `clients`, `sdk-internal`, `android`), an integration/snapshot +trophy (`ios`), or a wholly all-E2E repo (the dedicated `test` repo, +`browser-interactions-testing`). E2E is "thin" only _within_ a platform repo; the dedicated +`test` repo is entirely E2E by design. + +It ingests whatever evidence is available — a Jira ticket (via the Atlassian MCP), a GitHub +PR (via `gh`), an exported test-case CSV, and/or a plain-language description — fans out +subagents to gather it, assesses what is **already tested** (the `assessing-test-coverage` +skill, which inventories existing tests, cites each as a GitHub permalink, and writes a +coverage report), then runs the analyst skill (`analyzing-test-stack`), which produces the +test-stack recommendation. Both skills emit a self-contained HTML report. + +## Where each layer lives + +Unit and integration tests live alongside the code inside each platform repo +(e.g. `bitwarden/server`, `bitwarden/clients`, `bitwarden/ios`). **End-to-end tests live +in a dedicated, private `test` repository** — not inside the platform repos — so E2E +recommendations target that separate repo, and existing E2E coverage is treated as +unverified when that repo isn't checked out. + +## Agents + +| Agent | What It Does | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `test-strategist` | Classifies the inputs for a change (Jira, PR, CSV, description), fans out subagents to gather evidence, assesses existing coverage (`assessing-test-coverage`), then runs `analyzing-test-stack` — emitting a self-contained coverage report and a self-contained test-stack report. | + +## Skills + +| Skill | What It Does | +| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `assessing-test-coverage` | The backward-looking inventory. Determines what is **already tested** for a change — scoped to the change surface, PR-first then a targeted lookup — buckets each observed test by layer, cites it as a stable GitHub permalink, flags untested behaviors as gaps, and writes a self-contained HTML coverage report. Feeds `analyzing-test-stack`; usable standalone to audit current coverage. | +| `analyzing-test-stack` | The recommender. Consumes the coverage inventory, then maps each testable behavior in a change to the cheapest sufficient test layer per platform, inside each repo's actual shape, names concrete tooling, surfaces coverage gaps and shape-wrong tests (ice-cream-cone, over-testing, missing platform layers), and writes a self-contained HTML report into a per-change report directory. | + +## Cross-Plugin Integration + +| Plugin | How It's Used | +| --------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `bitwarden-atlassian-tools` | Optional but recommended. Provides the `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__*` server used to read Jira tickets and linked Confluence requirements. If absent, the plugin degrades gracefully — paste requirements or rely on the PR/CSV/description. | + +## Installation + +```bash +/plugin install bitwarden-test-engineer@bitwarden-marketplace +``` + +For Jira-backed analysis, install the Atlassian tools alongside it: + +```bash +/plugin install bitwarden-atlassian-tools@bitwarden-marketplace +``` + +## Usage + +The agent activates when you ask what test coverage a change needs, which +automation layers to add, how to shape a test plan, or whether existing tests are at the +right level: + +``` +I'm picking up PM-12345 next sprint. What test coverage should this feature have? +``` + +``` +Does bitwarden/server#5821 have the right tests, or is it leaning too hard on end-to-end? +``` + +``` +Here's our exported test cases CSV for the new item types import/export work (PM-32009) — +which of these should be automated and at what layer? +``` + +Each run produces a per-change directory `test-engineer-report--/` holding the +self-contained HTML reports: `coverage.html` (what is already tested — observed tests per layer, +each cited as a GitHub permalink, plus gaps), `recommended.html` (the per-platform recommendation +and its coverage-gap findings), and `combined.html` (the primary deliverable — both on one two-tab +page). Re-running on the same change and date refreshes the reports in that directory. They share +one off-brand data-report visual system so they read as the same instrument. + +## References + +- [Claude Code Agents](https://code.claude.com/docs/en/agents) +- [Claude Code Skills](https://code.claude.com/docs/en/skills) +- [The Testing Trophy](https://kentcdodds.com/blog/the-testing-trophy-and-testing-classifications) +- [Bitwarden Contributing Guidelines](https://contributing.bitwarden.com/contributing/) diff --git a/plugins/bitwarden-test-engineer/agents/test-strategist.md b/plugins/bitwarden-test-engineer/agents/test-strategist.md new file mode 100644 index 0000000..7500301 --- /dev/null +++ b/plugins/bitwarden-test-engineer/agents/test-strategist.md @@ -0,0 +1,151 @@ +--- +name: test-strategist +version: 1.0.0 +description: | + Test strategist for Bitwarden — the test-planning role, scoped to exactly the two skills it owns: (1) analyzing-test-stack, which recommends what test automation a change needs and at which layer, and (2) assessing-test-coverage, which inventories what is already tested. It produces a risk-weighted plan and a coverage inventory — it does NOT author, run, or maintain test code (a future SDET role), and does NOT perform exploratory or manual QA (a future QA-engineer role); do not delegate those to it. Takes a change — a feature, bugfix, refactor, or migration — described in plain language or carried in a Jira ticket, a GitHub PR, a Confluence tech breakdown, and/or an exported test-case CSV, and produces an evidence-driven recommendation for the right test automation layers (unit, integration, E2E), shaped to each repo's actual test practice rather than one universal shape, and risk-weighted by each behavior's defect severity (impact, not urgency), across Bitwarden's server, client, and mobile codebases. Use when the user asks what test coverage a change needs, which automation layers to add, how to shape a test plan, whether existing tests are over- or under-weighted, how to prioritize test coverage by risk, what tests a Critical/High bug needs, or what is already tested for a change — or asks for a "test stack" / "test strategy" / "risk-based coverage" / "coverage inventory" analysis for a ticket, PR, tech breakdown, or set of test cases. + + + Context: An engineer is about to start a Jira story and wants to know what test automation it should ship with. + user: "I'm picking up PM-12345 next sprint. What test coverage should this feature have?" + assistant: "I'll use the test-strategist agent to pull the requirements from PM-12345, map the change across the affected codebases, and produce a test-layer recommendation shaped to each affected repo." + + Jira-key intake. The agent gathers the ticket via the Atlassian MCP, then runs Skill(analyzing-test-stack) to produce the report. + + + + + Context: A reviewer wants to know whether an open PR is adequately tested at the right layers. + user: "Does bitwarden/server#5821 have the right tests, or is it leaning too hard on end-to-end?" + assistant: "I'll use the test-strategist agent to read the PR diff and its tests, assess the test shape, and check specifically for an ice-cream-cone (too E2E-heavy) anti-pattern." + + PR intake plus an explicit anti-pattern concern. The agent gathers the diff via gh, then runs the analyst, which assesses the test shape including the ice-cream-cone check. + + + + + Context: A QA engineer exported a set of manual test cases and wants an automation plan. + user: "Here's our exported test cases CSV for the new item types import/export work (PM-32009) — which of these should be automated and at what layer?" + assistant: "I'll use the test-strategist agent to parse the CSV, bucket the existing cases by test layer, find the gaps, and produce a layer-by-layer automation recommendation." + + CSV intake. The agent parses the export, then runs the analyst to map cases to layers and surface gaps. + + + + + Context: A tech lead just finished a tech breakdown and wants the test plan that should accompany it. + user: "I've got the tech breakdown for the new device-approval flow in Confluence — what test coverage should we plan across the stack?" + assistant: "I'll use the test-strategist agent to read the breakdown, mine its scope checklist and spec child pages for the surfaces and behaviors it touches, and produce a per-platform test-stack recommendation shaped to each repo." + + Tech-breakdown intake. The agent fetches the Confluence breakdown via the Atlassian MCP, extracts testable behaviors and the affected platforms from Part 2, then runs the analyst to emit the report. + + +model: inherit +tools: + - Read + - Write + - Glob + - Grep + - Skill + - Task + - AskUserQuestion + - Bash(gh pr view:*) + - Bash(gh pr diff:*) + - Bash(gh pr checks:*) + - Bash(git diff:*) + - Bash(git log:*) + - Bash(git rev-parse:*) + - Bash(git remote get-url:*) + - Bash(git -C * rev-parse:*) + - Bash(git -C * remote get-url:*) + - Bash(${CLAUDE_PLUGIN_ROOT}/scripts/build-report.sh:*) + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_issues + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_comments + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_remote_links + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_confluence_page + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_confluence + - mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_confluence_cql +skills: + - assessing-test-coverage + - analyzing-test-stack +color: green +--- + +You are the **test strategist** for Bitwarden — the test-planning role. Your job: take a change — a feature, bugfix, refactor, or migration — and say **what to test, at which layer, and why**. You recommend the plan and inventory existing coverage; you do not author, run, or maintain the tests, nor run exploratory/manual QA — those are separate roles this plugin may grow into later. + +You produce a recommendation — an HTML report — not the tests themselves. Ground every layer call in evidence; a test plan drifts toward whatever is easiest to write rather than what buys confidence, so keep each repo's shape honest. + +## Operating context + +A single feature frequently spans several repos (a server endpoint + a web client + a mobile screen), each shaped independently — match the recommendation to each repo's actual practice, not a house style. **Unit and integration live alongside the code in each platform repo; E2E lives in the dedicated `test` repo** (a sibling of the platform repos). The per-platform stack and the layer→repo map are in `${CLAUDE_PLUGIN_ROOT}/skills/analyzing-test-stack/references/monorepo-layout.md`. + +Atlassian capabilities depend on the **`bitwarden-atlassian-tools`** plugin (the `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__*` server). If it is absent and the user references a Jira issue or Confluence breakdown, don't fail — say the MCP is unavailable and ask the user to paste the requirements, or proceed from the PR / CSV / description provided. + +## Workflow + +Classify what the request needs and dispatch to the matching skill(s) — each skill runs standalone: + +- _"What's already tested for this PR?"_ → `Skill(assessing-test-coverage)` alone. +- _"What layers should this change ship with?"_ → `Skill(analyzing-test-stack)` (it pulls its own coverage inventory if none is supplied). +- A full test plan / test-stack analysis → the **coverage → recommendation pipeline** below, run in sequence (the coverage inventory feeds the recommendation). + +The steps below specify that pipeline end to end. + +### 1. Intake and scope + +Classify every input supplied — Jira key, GitHub PR, Confluence tech breakdown (page ID/URL or feature/team name), CSV path, plain-language description. Inputs are additive; handle any combination. Per-source ingestion (Epic expansion, breakdown mining, CSV column mapping) lives in `${CLAUDE_PLUGIN_ROOT}/references/input-sources.md` — don't re-derive it. Then determine the **affected repos/platforms**: if scope is genuinely ambiguous and it changes the recommendation, use `AskUserQuestion`; otherwise infer and state your assumption. + +### 2. Fan out to gather evidence + +Spawn `Task` subagents **in parallel**, one per evidence source or affected repo, so your context stays lean. Each returns a compact structured digest, not raw dumps: + +- **Requirements reader** (`sonnet`) — resolves the Jira issue into testable behaviors and acceptance criteria, expanding Epics/Features to their children, feeding linked PR URLs to the PR analyzer, and capturing the bug **severity** and each behavior's **source issue key + browse URL**. Follows `${CLAUDE_PLUGIN_ROOT}/references/input-sources.md` → _Epic intake_ and _Citing Jira issues as links_. +- **Breakdown reader** (`sonnet`) — fetches the tech breakdown, mines Part 2's scope checklist for surfaces, Part 4 spec pages for interfaces, and Part 5 open questions for untestable-requirement risk. Returns testable behaviors per platform plus the breakdown's status. +- **PR diff analyzer** (`sonnet`) — `gh pr diff` / `gh pr view` for the change surface, public API touched, and tests already present. +- **CSV parser** (`haiku`) — buckets existing cases by apparent layer and automation status. + +Give each subagent one source and a tight output contract; skip any branch whose input wasn't supplied. **Set each subagent's model explicitly** (see _Model selection and context discipline_) — never let a digest-returning subagent inherit your model. + +### 3. Assess existing coverage + +Once the change surface is known (step 2), determine what is **already tested** before recommending anything. Fan out a **per-repo coverage scout** (`sonnet`) per affected repo, each applying the `assessing-test-coverage` skill — the record shape, discovery rules, per-behavior discipline, and permalink recipe live in `${CLAUDE_PLUGIN_ROOT}/skills/assessing-test-coverage/references/finding-coverage.md`; scouts follow it. Each returns one record per behavior plus `unverified` gaps. Merge the scouts' records into one inventory. + +Then invoke `Skill(assessing-test-coverage)` with the merged inventory and today's date to produce the coverage inventory and the **self-contained HTML coverage report**. Per the skill, the HTML _rendering_ is delegated to the Sonnet **report-writer subagent** — only the gathering and merge happen in your context. Skills can't read the clock; pass today's date, and the build script writes the report into the per-change `test-engineer-report--/` directory. + +### 4. Recommend + +Invoke `Skill(analyzing-test-stack)` with the digests **and the coverage inventory from step 3**. The behavior→layer mapping is the genuinely hard reasoning and **stays in your context** — map each behavior to the cheapest sufficient layer per platform, risk-weighted by severity, and surface gaps and shape-wrong tests (ice-cream-cone, mislabeled layers, ungrounded coverage claims) ordered by severity; the skill and its `references/` own how. Once the mapping is decided, rendering it to the **self-contained HTML report** is mechanical and is delegated to the Sonnet **report-writer subagent** — hand it the decided per-behavior records (each carrying its `source_issue` from intake) and your `#overview` synthesis. + +### 5. Combine and present + +Steps 3 and 4 each write their report into the per-change directory `test-engineer-report--/` — `coverage.html` and `recommended.html`. Assemble the **combined two-tab page** — the primary deliverable, _Current coverage_ + _Recommended coverage_ on one page — yourself with the build script (pure file assembly, no template or stylesheet reading, so your context stays lean): + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/build-report.sh" \ + --kind test-combined --slug --date \ + --current test-engineer-report--/coverage.html \ + --recommended test-engineer-report--/recommended.html +``` + +The paths are deterministic under the per-change directory (and the prior steps print them); the two standalone reports are read, not modified, and `combined.html` lands beside them. Then mirror the test-stack report's `#overview` in chat — recommended shape per platform, the top open risks to resolve before committing to the plan, and any coverage the analyst couldn't verify — and point the user at `test-engineer-report--/combined.html` first (both standalone reports remain available for sharing a single view). + +## Principles + +These govern the orchestration; the per-skill principles live in the two skills. + +- **Coverage before recommendation.** Assess what exists (step 3) before mapping new layers (step 4); the recommendation is incremental against observed coverage, not absolute. +- **Degrade gracefully.** A missing input (no MCP, no PR, no CSV, no `test` checkout) narrows the analysis; it never blocks it. State what you couldn't see. + +## Model selection and context discipline + +You **inherit the session model** for your own context — the orchestration and the hard behavior→layer/severity reasoning, where a wrong call is expensive to act on, stay with you. Everything you fan out is evidence-gathering or mechanical rendering and runs on an **explicitly pinned** cheaper model — never inherit: + +- **Evidence subagents** (step 2) — `sonnet` for anything reading a diff, ticket, or repo; `haiku` for pure CSV parsing. +- **Coverage scouts** (step 3) — `sonnet`. +- **Report-writer** — `sonnet`. Once the inventory (step 3) and the mapping (step 4) are decided, rendering to HTML is mechanical: the report-writer authors the content fragment per the skill's template and runs `build-report.sh` to splice in the stylesheet. + +Keep your own context lean — it is the most expensive token pool and is re-cached every turn: + +- **Never read the rendering files** (`html-report-template.md`, `coverage-report-template.md`, `report-template-common.md`, `report-style-tokens.md`, `report-style.css`, `build-report.sh`) — they are the report-writer's concern. You need only the reasoning references (`test-layers.md`, `severity-risk.md`, `monorepo-layout.md`, `input-sources.md`, and `finding-coverage.md` for the contract). The step-5 combined build is the one time you _invoke_ `build-report.sh` — on the two finished filenames; you still never read its source. +- **Don't echo digests.** Synthesize subagent digests into the decision; keep inter-step narration to a few lines. The reports are the deliverable. +- **Hand off by the smallest payload.** Pass report-writers the compact per-behavior records and the `#overview` text; if a record set is large, `Write` it to a temp file (e.g. `./.test-engineer-.json`) and pass the path. diff --git a/plugins/bitwarden-test-engineer/references/input-sources.md b/plugins/bitwarden-test-engineer/references/input-sources.md new file mode 100644 index 0000000..d0342cc --- /dev/null +++ b/plugins/bitwarden-test-engineer/references/input-sources.md @@ -0,0 +1,170 @@ +# Ingesting evidence sources + +Inputs are additive — handle any combination, and record in the report which sources were +present and which were missing. Never block on a missing source. + +## Jira ticket + +Preferred: if the `bitwarden-atlassian-tools` plugin is installed, invoke +`Skill(bitwarden-atlassian-tools:researching-jira-issues)` for a deep, link-following read. + +Otherwise use the MCP tools directly: + +- `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue` — the issue itself (summary, description, + acceptance criteria, custom fields). +- `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_comments` — clarifications and edge cases raised in + discussion. +- `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_remote_links` — linked Confluence pages and PRs. +- `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_confluence_page` — linked requirements/design docs. + +Extract: discrete **testable behaviors**, **acceptance criteria**, and the **platforms/ +components** named. If the MCP is unavailable, ask the user to paste the requirements. + +For every issue, also capture its **key and browse URL** and **carry the originating key with each +behavior you extract**, so the report can link every behavior back to its source — link form and the +no-Jira-source case are in _Citing Jira issues as links_ below. + +Also capture each behavior's **severity** and carry it through with the behavior. Where it comes +from (a bug's Jira severity field vs. assessed risk for a feature) and how it weights coverage are +owned by `analyzing-test-stack`'s `references/severity-risk.md`. + +### Epic intake + +A Jira key may resolve to an Epic (or, in next-gen projects, a Feature) rather than a single +story. The epic body itself rarely lists testable behaviors — those live on its children +and on the PRs the children produce. If you analyze only the epic, you will under-scope the +analysis. So when the `issuetype` on the `get_issue` response is `Epic` or `Feature`, expand +before extracting: + +1. **Discover children.** Read the `subtasks` field first. If empty (common in next-gen + projects, which use `parent` relationships rather than the legacy `subtasks` field), fall + back to `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_issues` with JQL `parent = `. On + classic projects, also try `"Epic Link" = `. Together these cover both schemas. +2. **Bound the fan-out.** If the epic has more than ~10 children, fetch the first 10 in full + and summarize the rest as a one-line list (key, status, summary) from the search results. + This matches the depth-control discipline in + `bitwarden-atlassian-tools:researching-jira-issues` (Steps 2–3) — re-use that recipe; do + not re-derive it. +3. **Per child, gather behaviors and PRs.** + - `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue` for the child's description and acceptance criteria — + these are the testable behaviors. Carry each child's **key and browse URL** with the behaviors + it produces — a behavior sourced from a child links to that child, not the epic. + - `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_issue_remote_links` for PRs (grouped under "GitHub"). Each PR URL + feeds the **GitHub PR** branch below (`gh pr view` / `gh pr diff`). **These merged/linked PRs + are the reliable backbone for existing coverage** — they carry the tests that shipped and the + PR head SHA makes each permalink-ready (see `finding-coverage.md` → _Finding existing + coverage_). If `gh` cannot reach a PR (private fork, not authenticated, repo inaccessible), + record the URL as evidence-not-inspected rather than dropping it. +4. **Track epic status.** The epic's status (`In Planning`/`In Progress`/`Done`) tells you how much + is shipped: `Done` children with merged PRs likely have tests-in-PR to audit; `To Do` children + are scope-only and the recommendation is prospective. Surface this in the report's Evidence. +5. **Preferred path.** The `researching-jira-issues` skill (preferred at the top of this file) does + this hierarchical discovery and depth-controlled traversal in one synthesized read — run it on the + epic key; the direct MCP calls above are the fallback. + +## GitHub PR + +- `gh pr view --json url,headRefOid,baseRefName,title,body,files,state` — title, + body, linked issues, files changed, **and the head SHA + `owner/repo`** needed for + permalink production downstream. +- `gh pr diff ` — the actual change surface. + +Extract: the public API / behavior touched, the diff paths (→ which repos/platforms), +**any tests already included in the PR** (so you assess incremental, not absolute, +gaps), and the captured **`headRefOid`** + **`owner/repo`** (parsed from the PR URL). +The SHA and `owner/repo` are required — they are what makes every test cited as +existing coverage clickable in the report. Tests observed in the PR diff are primary +coverage evidence; for _pre-existing_ tests not in the diff, do a targeted lookup scoped +to the changed paths/symbols rather than a repo-wide sweep. See the +`assessing-test-coverage` skill's `references/finding-coverage.md` → _Finding existing +coverage_ and _Citing tests as GitHub permalinks_ for the link form and the fallback when +ingredients are missing. + +## Technical breakdown document + +A Bitwarden **Tech Breakdown** — the Confluence artifact a team produces before implementation, +authored with the `bitwarden-delivery-tools:writing-tech-breakdowns` skill. It is the richest +single input for this analysis, because a good breakdown has already done the cross-platform +scoping you would otherwise reconstruct from a diff or a ticket. Mine it; don't re-derive it. + +Locate and fetch it: + +- If given a page ID or URL, fetch directly with `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__get_confluence_page`. +- If given only a feature/team name, find the page first with `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_confluence` + or `mcp__plugin_bitwarden-atlassian-tools_bitwarden-atlassian__search_confluence_cql` (breakdowns live in a team's "Tech Breakdown" + folder), then fetch it. +- The breakdown's **status** matters: `IN PLANNING` / `IN PROGRESS` means the scope may still + shift — note that the recommendation rests on a draft. `PROPOSED` / `ACCEPTED` is a stable + basis. Record the status as part of the evidence. + +Map its structure to testable evidence (the canonical template is page `2920349776`): + +- **Part 1 — Problem overview**: the feature framing and linked Jira epic. Use it for scope and + to cross-link any Jira/PR inputs, not as a behavior source on its own. **When Part 1 names an + Epic**, treat it the same as an Epic-key intake — drill into its children and their PR remote + links per the _Epic intake_ recipe above. A breakdown plus its epic together usually surface + more testable behavior than either alone. +- **Part 2 — Breakdown scope checklist**: the core of the mining. Each answered item names a + surface the change touches and therefore a place tests are needed — **Database changes** + (migration/backwards-compat behaviors, EDD phasing), **API changes** (endpoint contracts, + V±2 compatibility, any unauthenticated endpoint), **UI components** (shared/base components), + **SDK changes**, **Services touched**, **Hosting** (Self-Hosted vs Cloud paths), + **Feature flagging** (flag-on/flag-off states to cover), and **Security considerations** + (crypto, threat-model-relevant behaviors). The **Testing considerations** item is the team's + own stated test intent — treat it as a claim to assess, not as ground truth + to copy. +- **Part 4 — Specification artifacts**: linked child pages defining concrete interfaces (API + contracts, schemas, component APIs, crypto schemes). Fetch the relevant ones with + `get_confluence_page`; their public interfaces and edge cases are exactly what integration and + unit tests pin down. +- **Part 5 — Open questions**: unresolved questions are untestable-requirement risk — a behavior + can't be reliably tested until its question is answered. Surface them in the report's gaps. + +Extract: discrete **testable behaviors** per platform, the **surfaces** each touches (→ repos via +the `analyzing-test-stack` skill's `references/monorepo-layout.md`), and the team's **stated testing +intent** (to evaluate, not echo). Where the +breakdown's scope checklist disagrees with a diff or ticket you were also given, treat the +divergence as a finding rather than silently picking one. + +## Test-case CSV export + +A CSV export of existing or planned test cases. Column headers vary by tool and export +settings — **do not hardcode them**. Read the header row, then map by meaning: + +- A **title / case** column — the scenario name. +- A **type** column (e.g. "Regression", "Smoke", "Functional") — hints at intended layer. +- An **automation status** column (e.g. "Ready to Automate", "Automated", "Manual") — + what already exists vs. what's planned. +- A **steps / expected-result** column, often in Given–When–Then form — the behavior. +- Optional **team / area / tags / preconditions** columns — scope and grouping. + +Map rows to behaviors and bucket each by apparent layer using the `analyzing-test-stack` skill's `references/test-layers.md`: + +- A case that drives the full UI through a complete journey → likely **E2E** (target the + dedicated `test` repo). +- A case asserting one service/component's behavior through its collaborators → + **integration**. +- A case pinning a single function's logic or an edge case → **unit**. + +Flag cases that are currently manual but cheaply automatable at a lower layer, and cases +slated for E2E that would be better as integration. If a column's meaning is ambiguous, +state the interpretation you used rather than guessing silently. + +## Citing Jira issues as links + +Every Jira item the report **names**, and every behavior **found from a Jira item**, is rendered as +a clickable link — never bare key text. This is the Jira counterpart to the GitHub permalink rule +for tests (`finding-coverage.md` → _Citing tests as GitHub permalinks_). + +The link form is the issue's browse URL `https://bitwarden.atlassian.net/browse/` (e.g. +`PM-1234`). Prefer the URL the MCP tool or `researching-jira-issues` skill returns; else construct it +from the key. The same rule covers epics and their children — link each to its own key. Apply it: + +- An **issue, epic, or child key** named in Overview/Summary/Evidence — anchor the key: + `PM-1234`. +- A **behavior row** (recommendations/coverage/gaps) extracted from a Jira item — append the linked + source key to the behavior cell. A behavior with no Jira source (PR-only) carries none. + +These are informational `` citations (text, not loaded assets), so they don't violate the +self-contained constraint. Never fabricate a key or URL — if a key is unknown, name the source in +plain text rather than inventing a link. diff --git a/plugins/bitwarden-test-engineer/references/report-style-tokens.md b/plugins/bitwarden-test-engineer/references/report-style-tokens.md new file mode 100644 index 0000000..013c469 --- /dev/null +++ b/plugins/bitwarden-test-engineer/references/report-style-tokens.md @@ -0,0 +1,131 @@ +# Report style tokens — data-report visual system for HTML reports + +The **visual system** for every self-contained HTML report the `bitwarden-test-engineer` plugin +emits — the `analyzing-test-stack` test-stack report and the `assessing-test-coverage` coverage +report alike. Because the output is a single file with no external assets, the stylesheet is +inlined; both reports splice the **same** canonical CSS so they read as one instrument and cannot +drift. + +**You never retype, prune, or hand-edit the stylesheet.** It lives as a real file at +`report-style.css` (alongside this file) and is spliced into the report by `scripts/build-report.sh` +— never reproduced as model output. Authoring a report means writing its **content** (the sections +below) into a fragment whose ` +``` + +Write that fragment to a temporary path (e.g. `-report-.fragment.html`), then run the +build script from the plugin root: + +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/build-report.sh" \ + --kind --slug --date \ + +``` + +It replaces the sentinel with `report-style.css` verbatim and writes the report into a per-change +directory `test-engineer-report--/` (created if needed) — the coverage report as +`coverage.html`, the test-stack report as `recommended.html` — then prints the final path. The +directory name derives only from `--slug`/`--date`, so a run's reports share one folder; +**re-running the same change on the same date refreshes the report in place**. Delete the temporary +fragment afterward. If the script errors (missing sentinel, bad `--kind`/`--date`, fragment not +found) it writes nothing — fix the fragment and re-run rather than pasting CSS by hand. + +**Combined two-tab page (assembled, not authored).** When both reports exist for one change, the +build script can stitch them into one page with two CSS-only tabs — _Current coverage_ and +_Recommended coverage_. This is a presentation-only merge from the two finished report files: no +skill or template knows about tabs, and the agent (not the report author) runs it with +`--kind test-combined --current test-engineer-report--/coverage.html --recommended test-engineer-report--/recommended.html`, +which writes `combined.html` into that same directory. The tab chrome lives entirely in the build +script and `report-style.css`. + +## What not to do + +- Do not reintroduce a brand skin — no saturated brand colors, no logo images, no `` to a + design system. The report is intentionally off-brand and self-contained. +- Do not swap the sequential layer ramp for unrelated categorical hues; the order is the encoding. +- Do not paste, retype, or trim the stylesheet into the fragment — the fragment carries only the + sentinel. A report that ships a hand-copied or "only the classes I used" stylesheet is exactly how + two reports drift apart. +- Do not hand-compute distribution bar widths — set `flex: ` per segment. diff --git a/plugins/bitwarden-test-engineer/references/report-style.css b/plugins/bitwarden-test-engineer/references/report-style.css new file mode 100644 index 0000000..ad98427 --- /dev/null +++ b/plugins/bitwarden-test-engineer/references/report-style.css @@ -0,0 +1,552 @@ +:root { + /* Surfaces & ink — flat paper, no cards or shadows */ + --paper: #ffffff; + --panel: #f4f6f8; + --ink: #16191d; + --ink-soft: #585f68; + --ink-faint: #818892; + --rule: #e4e7ea; + + /* Layer ramp — SEQUENTIAL: ordered cheap/shallow -> costly/deep */ + --unit: #8fb3d1; + --integration: #3f7196; + --e2e: #1d3a54; + --on-unit: #16191d; /* --unit is light: use dark text */ + --on-deep: #ffffff; /* white text on integration/e2e */ + + /* Verdict & state — muted categorical */ + --ok: #43875a; + --warn: #b07d2f; + --bad: #bf564a; + --on-state: #ffffff; + + --link: #2f6e9e; + + --sans: + system-ui, -apple-system, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; + --mono: + ui-monospace, "SF Mono", SFMono-Regular, Menlo, Consolas, "Liberation Mono", + monospace; +} + +* { + box-sizing: border-box; +} +html { + -webkit-text-size-adjust: 100%; + scroll-padding-top: 24px; /* keep anchored sections clear of the top edge */ +} + +body { + margin: 0; + background: var(--paper); + color: var(--ink); + font: 15px/1.6 var(--sans); + font-feature-settings: "tnum" 1; /* tabular figures where supported */ + -webkit-font-smoothing: antialiased; + text-rendering: optimizeLegibility; +} + +/* Smooth in-page jumps for the report's overview -> section anchor links, + suppressed when the reader prefers reduced motion. */ +@media (prefers-reduced-motion: no-preference) { + html { + scroll-behavior: smooth; + } +} + +a { + color: var(--link); + text-decoration: underline; + text-underline-offset: 2px; + text-decoration-thickness: 1px; +} +a:focus-visible, +summary:focus-visible { + outline: 2px solid var(--link); + outline-offset: 2px; +} + +/* Masthead */ +header { + max-width: 60rem; + margin: 0 auto; + padding: clamp(36px, 7vw, 56px) clamp(20px, 5vw, 32px) 28px; +} +header .eyebrow { + margin: 0 0 14px; + font: 600 11px/1 var(--mono); + letter-spacing: 0.18em; + text-transform: uppercase; + color: var(--ink-faint); +} +header h1 { + margin: 0 0 12px; + font-size: clamp(24px, 5vw, 32px); + line-height: 1.2; + font-weight: 650; + letter-spacing: -0.01em; + text-wrap: balance; +} +header .meta { + font: 12px/1.6 var(--mono); + color: var(--ink-soft); +} +header .meta a { + color: var(--ink-soft); +} + +/* In-page table of contents — a compact monospace row of section links at the + top of
. In the combined report the build script namespaces each link's + href per tab, so a panel's ToC jumps within its own panel. */ +.toc { + display: flex; + flex-wrap: wrap; + gap: 6px 18px; + margin: 0 0 4px; + padding: 0 0 20px; + border-bottom: 1px solid var(--rule); + font: 600 11px/1.6 var(--mono); + letter-spacing: 0.08em; + text-transform: uppercase; +} +.toc a { + color: var(--ink-soft); + text-decoration: none; +} +.toc a:hover { + color: var(--link); + text-decoration: underline; +} + +/* Sections — flat, hairline-separated, auto-numbered */ +main { + max-width: 60rem; + margin: 0 auto; + padding: 0 clamp(20px, 5vw, 32px) 96px; + counter-reset: sec; +} +section { + counter-increment: sec; + padding: 36px 0; + border-top: 1px solid var(--rule); + scroll-margin-top: 24px; +} +section:first-of-type { + border-top: 0; +} +/* Quiet landing cue: briefly tint a section an in-page link jumped to. */ +@media (prefers-reduced-motion: no-preference) { + section:target { + animation: section-land 1.4s ease-out; + } + @keyframes section-land { + from { + background: var(--panel); + } + to { + background: transparent; + } + } +} +section > h2 { + margin: 0 0 18px; + font-size: 19px; + font-weight: 650; + letter-spacing: -0.01em; + text-wrap: balance; +} +section > h2::before { + content: counter(sec, decimal-leading-zero); + display: inline-block; + margin-right: 12px; + font: 600 12px/1 var(--mono); + letter-spacing: 0.1em; + color: var(--ink-faint); + vertical-align: 2px; +} +section h3 { + margin: 28px 0 10px; + font: 600 11px/1.3 var(--mono); + letter-spacing: 0.12em; + text-transform: uppercase; + color: var(--ink-soft); +} + +/* Prose */ +p { + margin: 0 0 14px; + max-width: 72ch; + text-wrap: pretty; /* avoid orphans / ragged short last lines */ +} +.lead { + font-size: 16px; +} +.small { + font-size: 12.5px; + color: var(--ink-soft); +} +ul.tight { + margin: 8px 0 16px; + padding-left: 20px; +} +ul.tight li { + margin: 0 0 6px; +} +ol { + padding-left: 22px; +} +ol li { + margin: 0 0 10px; +} +code { + font: 0.86em var(--mono); + background: var(--panel); + padding: 1px 5px; + border-radius: 3px; +} + +/* Tables — heavy header rule, hairline rows */ +.scroll { + overflow-x: auto; + -webkit-overflow-scrolling: touch; + overscroll-behavior-x: contain; +} +table { + width: 100%; + border-collapse: collapse; + margin: 4px 0 18px; + font-size: 13.5px; +} +thead th { + text-align: left; + vertical-align: bottom; + padding: 0 12px 8px; + font: 600 10.5px/1.3 var(--mono); + letter-spacing: 0.1em; + text-transform: uppercase; + color: var(--ink-faint); + border-bottom: 1px solid var(--ink); +} +tbody td { + vertical-align: top; + padding: 10px 12px; + border-bottom: 1px solid var(--rule); +} +tbody tr:hover { + background: var(--panel); +} +th:first-child, +td:first-child { + padding-left: 0; +} +th:last-child, +td:last-child { + padding-right: 0; +} + +/* Layer chip */ +.layer { + display: inline-block; + font: 600 10.5px/1.6 var(--mono); + letter-spacing: 0.08em; + text-transform: uppercase; + padding: 2px 8px; + border-radius: 2px; + white-space: nowrap; +} +.layer.unit { + background: var(--unit); + color: var(--on-unit); +} +.layer.integration { + background: var(--integration); + color: var(--on-deep); +} +.layer.e2e { + background: var(--e2e); + color: var(--on-deep); +} + +/* Layer-distribution chart (the signature graphic) */ +figure { + margin: 18px 0; +} +figcaption { + margin-bottom: 14px; + font: 11px/1.4 var(--mono); + letter-spacing: 0.04em; + color: var(--ink-faint); +} +.dist .legend { + display: flex; + flex-wrap: wrap; + gap: 18px; + margin-bottom: 14px; + font: 11px/1 var(--mono); + color: var(--ink-soft); +} +.dist .legend .key { + display: inline-flex; + align-items: center; + gap: 6px; + text-transform: uppercase; + letter-spacing: 0.06em; +} +.dist .legend .key::before { + content: ""; + width: 10px; + height: 10px; + border-radius: 2px; + background: var(--rule); +} +.dist .legend .unit::before { + background: var(--unit); +} +.dist .legend .integration::before { + background: var(--integration); +} +.dist .legend .e2e::before { + background: var(--e2e); +} +.dist-row { + display: flex; + align-items: center; + gap: 14px; + margin: 7px 0; +} +.dist-row .dist-label { + flex: 0 0 14ch; + text-align: right; + font: 11px/1.3 var(--mono); + color: var(--ink-soft); + word-break: break-word; +} +.dist-row .bar { + flex: 1; + display: flex; + height: 24px; + background: var(--panel); + border-radius: 3px; + overflow: hidden; +} +.bar .seg { + display: flex; + align-items: center; + justify-content: center; + min-width: 18px; + font: 600 11px/1 var(--mono); + color: var(--on-deep); +} +.bar .seg.unit { + background: var(--unit); + color: var(--on-unit); +} +.bar .seg.integration { + background: var(--integration); +} +.bar .seg.e2e { + background: var(--e2e); +} + +/* Per-platform recommended-shape list (replaces card blocks) */ +ul.shapes { + margin: 6px 0 0; + padding: 0; + list-style: none; +} +ul.shapes li { + padding: 10px 0; + border-top: 1px solid var(--rule); +} +ul.shapes li:first-child { + border-top: 0; +} +ul.shapes .plat { + font: 600 13px/1.5 var(--mono); +} + +/* Badges */ +.badge { + display: inline-block; + font: 600 10px/1.5 var(--mono); + letter-spacing: 0.04em; + text-transform: uppercase; + padding: 1px 6px; + border-radius: 2px; + color: var(--on-state); + white-space: nowrap; +} +.badge.assumption { + background: var(--warn); +} +.badge.warn { + background: var(--bad); +} +.badge.ok { + background: var(--ok); +} + +/* Unlinkable evidence */ +.unlinkable { + font: italic 12px/1.4 var(--mono); + color: var(--ink-faint); +} + +/* Tabbed combined report — the Current-coverage and Recommended-coverage report + bodies surfaced as two tabs on one page, CSS-only (no JavaScript). The radio + inputs are visually hidden but keep keyboard focus; the checked input drives + both the active label and which panel shows. These rules are only exercised by + the combined report; they are inert in the standalone coverage/test-stack + reports, which never emit these elements. */ +.tab-input { + position: absolute; + width: 1px; + height: 1px; + margin: -1px; + opacity: 0; +} +.tablist { + max-width: 60rem; + margin: 0 auto; + padding: 0 clamp(20px, 5vw, 32px); + display: flex; + flex-wrap: wrap; + gap: 4px; + border-bottom: 1px solid var(--ink); +} +.tablist label { + display: inline-block; + padding: 11px 16px; + font: 600 11px/1.4 var(--mono); + letter-spacing: 0.1em; + text-transform: uppercase; + color: var(--ink-faint); + cursor: pointer; + border: 1px solid transparent; + border-bottom: 0; + border-radius: 3px 3px 0 0; + margin-bottom: -1px; /* sit the tab on the list's bottom rule */ +} +.tablist label:hover { + color: var(--ink); + background: var(--panel); +} +/* A tabpanel is itself a section element; neutralize the global section chrome + so only the report sections nested inside its main element render with rules + and numbering. */ +.tabpanel { + display: none; + padding: 0; + border-top: 0; + counter-increment: none; +} +/* Active tab + its panel, driven by the checked radio (general-sibling ~). */ +#tab-current:checked ~ .tablist label[for="tab-current"], +#tab-recommended:checked ~ .tablist label[for="tab-recommended"] { + color: var(--ink); + border-color: var(--ink); + border-bottom-color: var(--paper); + background: var(--paper); +} +#tab-current:checked ~ .tabpanel[data-panel="current"], +#tab-recommended:checked ~ .tabpanel[data-panel="recommended"] { + display: block; +} +/* Keyboard focus on the visually-hidden radio surfaces a ring on its label. */ +#tab-current:focus-visible ~ .tablist label[for="tab-current"], +#tab-recommended:focus-visible ~ .tablist label[for="tab-recommended"] { + outline: 2px solid var(--link); + outline-offset: -2px; +} + +/* Floating "back to top" control — a fixed action button that rides along as the + reader scrolls and jumps to the top via the in-page #top anchor on
. No + JavaScript: it reuses the same smooth-scroll / reduced-motion behavior as the ToC + links. Flat to fit the data-report system — a solid ink fill carries it over the + content instead of a shadow. Present in every report; hidden when printing. */ +.to-top { + position: fixed; + right: clamp(16px, 4vw, 28px); + bottom: clamp(16px, 4vw, 28px); + z-index: 20; + display: inline-flex; + align-items: center; + gap: 6px; + padding: 9px 13px; + background: var(--ink); + color: var(--paper); + font: 600 10.5px/1 var(--mono); + letter-spacing: 0.1em; + text-transform: uppercase; + text-decoration: none; + border-radius: 4px; +} +.to-top::before { + content: "\2191"; /* upwards arrow */ + font-size: 13px; + line-height: 1; +} +.to-top:hover { + background: var(--link); + color: var(--paper); +} +.to-top:focus-visible { + outline: 2px solid var(--link); + outline-offset: 2px; +} + +@media (max-width: 720px) { + header, + main, + .tablist { + padding-left: 20px; + padding-right: 20px; + } + .dist-row { + flex-direction: column; + align-items: stretch; + gap: 4px; + } + .dist-row .dist-label { + flex: none; + text-align: left; + } +} + +@media print { + body { + font-size: 11pt; + } + /* Tabs cannot be toggled on paper — drop the controls and stack both report + bodies, each titled by its panel label so the printout stays legible. */ + .tab-input, + .tablist, + .to-top { + display: none; + } + .tabpanel { + display: block !important; + } + .tabpanel::before { + content: attr(aria-label); + display: block; + max-width: 60rem; + margin: 0 auto; + padding: 16px clamp(20px, 5vw, 32px) 0; + font: 600 11px/1.3 var(--mono); + letter-spacing: 0.12em; + text-transform: uppercase; + color: var(--ink-faint); + } + section { + break-inside: avoid; + border-top-color: #ccc; + } + tbody tr:hover { + background: none; + } + a { + color: var(--ink); + } +} diff --git a/plugins/bitwarden-test-engineer/references/report-template-common.md b/plugins/bitwarden-test-engineer/references/report-template-common.md new file mode 100644 index 0000000..05877a9 --- /dev/null +++ b/plugins/bitwarden-test-engineer/references/report-template-common.md @@ -0,0 +1,152 @@ +# Report HTML — shared authoring contract + +Both self-contained HTML reports the `bitwarden-test-engineer` plugin emits — the +`analyzing-test-stack` **test-stack report** and the `assessing-test-coverage` **coverage report** — +are authored against this shared contract, so the two read as one instrument. Each skill's own +template (`html-report-template.md` / `coverage-report-template.md`) covers only what differs: its +section set, its per-platform table columns, and its recommend-vs-inventory framing. **Read this +file first, then that template.** + +## Output constraints + +Produce a **single self-contained HTML file**: all CSS inline in ` + + +
+

…report title…

+

…the change under analysis…

+

…ticket/PR · status · team · date…

+
+
+ +
+

Overview

+ …synthesis: recap per platform; top 3 items; anchor links into the + detail sections… +
+
+

…summary heading…

+ …2–4 sentences… +
+
Fig 1 · …layer distribution by platform…
+
+ unit + integration + e2e +
+
+ bitwarden/server +
+ 3 + 11 + 1 +
+
+ +
+
    +
  • bitwarden/server — …one-line shape…
  • + +
+
+
+

Evidence & sources

+
+ …sources used + what was missing + commit SHA(s)… +
+
+ +
+

…gaps heading…

+ …per your template… +
+
+
Top + + +``` diff --git a/plugins/bitwarden-test-engineer/scripts/build-report.sh b/plugins/bitwarden-test-engineer/scripts/build-report.sh new file mode 100755 index 0000000..8142459 --- /dev/null +++ b/plugins/bitwarden-test-engineer/scripts/build-report.sh @@ -0,0 +1,213 @@ +#!/usr/bin/env bash +# +# build-report.sh — assemble a self-contained HTML report for the +# bitwarden-test-engineer plugin by splicing the canonical stylesheet into a +# model-authored content fragment. +# +# The model writes a fragment whose + + +HTML + # Shared masthead: reuse the recommendation report's header, relabel its + # eyebrow so the page reads as the combined deliverable, not one report. + extract_region "$RECOMMENDED" "" \ + | sed -E 's#(

)[^<]*(

)#\1Test Engineering Report\2#' + cat <<'HTML' + + + +
+HTML + extract_region "$CURRENT" "" | prefix_ids cur + cat <<'HTML' +
+
+HTML + extract_region "$RECOMMENDED" "" | prefix_ids rec + # The reused masthead carries id="top"; emit the back-to-top control once for + # the whole page. Each standalone report's own control sits after its
, + # outside the extracted region, so the combined page would otherwise have none. + cat <<'HTML' + + Top + + +HTML + } | splice_css > "$OUT" + + echo "$OUT" + exit 0 +fi + +# --- single report (test-stack | test-coverage) ------------------------------ +if [[ -z "$FRAGMENT" || ! -f "$FRAGMENT" ]]; then + echo "build-report.sh: fragment HTML file not found: '${FRAGMENT}'" >&2 + exit 2 +fi +if ! grep -qF "$SENTINEL" "$FRAGMENT"; then + echo "build-report.sh: fragment '${FRAGMENT}' has no stylesheet sentinel." >&2 + echo " Put exactly this line inside the