diff --git a/eng/skill-validator/src/Check/CheckCommand.cs b/eng/skill-validator/src/Check/CheckCommand.cs index 2dd32ac4e8..c3f52a7c4e 100644 --- a/eng/skill-validator/src/Check/CheckCommand.cs +++ b/eng/skill-validator/src/Check/CheckCommand.cs @@ -14,6 +14,12 @@ public static class CheckCommand ? StringComparer.OrdinalIgnoreCase : StringComparer.Ordinal; + // A skill with `disable-model-invocation: true` in its frontmatter is + // dropped from the Copilot CLI's model-facing skill menu and therefore does + // not consume the skill-menu character budget tracked by + // SkillProfiler.MaxRenderedSkillMenuLength. The flag is parsed once during + // discovery and surfaced on SkillInfo.DisableModelInvocation. + public static Command Create() { var pluginOpt = new Option("--plugin") { Description = "Plugin directories to check (discovers skills, agents, plugin.json)", AllowMultipleArgumentsPerToken = true }; @@ -219,14 +225,26 @@ private static async Task RunPluginCheck(CheckConfig config, CheckReportBui foreach (var (pluginDirectoryPath, skills) in pluginSkills) { - int totalChars = skills.Sum(s => s.Description.Length); - if (totalChars <= SkillProfiler.MaxAggregateDescriptionLength) + // Sum each model-invocable skill's RENDERED menu cost — the full + // block the Copilot CLI emits (name + description + location + // + markup), via SkillProfiler.RenderedSkillMenuCost — so this + // mirrors the real SKILL_CHAR_BUDGET rather than just the raw + // description length. + // + // Skills hidden from the model-facing skill menu via + // `disable-model-invocation: true` do not consume that budget, so + // they are excluded from the aggregate (see + // SkillProfiler.MaxRenderedSkillMenuLength). + int totalChars = skills + .Where(s => !s.DisableModelInvocation) + .Sum(SkillProfiler.RenderedSkillMenuCost); + if (totalChars <= SkillProfiler.MaxRenderedSkillMenuLength) continue; var pluginResult = builder.Plugins.FirstOrDefault(p => string.Equals(p.DirectoryPath, pluginDirectoryPath, s_pathComparison)); var pluginLabel = pluginResult?.Name ?? Path.GetFileName(pluginDirectoryPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)); - var message = $"Plugin '{pluginLabel}' aggregate description size is {totalChars:N0} characters — maximum is {SkillProfiler.MaxAggregateDescriptionLength:N0}."; + var message = $"Plugin '{pluginLabel}' rendered skill-menu size is {totalChars:N0} characters — maximum is {SkillProfiler.MaxRenderedSkillMenuLength:N0}."; if (pluginResult is not null) pluginResult.Errors.Add(message); else diff --git a/eng/skill-validator/src/Check/SkillProfiler.cs b/eng/skill-validator/src/Check/SkillProfiler.cs index c20315fd6a..28be7ad95d 100644 --- a/eng/skill-validator/src/Check/SkillProfiler.cs +++ b/eng/skill-validator/src/Check/SkillProfiler.cs @@ -32,33 +32,55 @@ public static partial class SkillProfiler // vocabularies, but BPE counts are close enough across models for complexity classification. private static readonly Lazy s_bpeTokenizer = new(() => TiktokenTokenizer.CreateForModel("gpt-4")); - // Per-plugin aggregate description size cap. NOTE: this is a local repo - // policy, NOT a documented Copilot/agentskills constraint. The agentskills.io - // specification (https://agentskills.io/specification) defines per-skill - // limits — description (1024 chars, #description-field), compatibility - // (500 chars, #compatibility-field), and name (64 chars, #name-field) — - // but does NOT define any aggregate per-plugin cap. The original 15,000 - // was introduced in #238 / discussed in #222 ("15K characters was - // mentioned, we could choose smaller") as an informal guardrail against - // bloated metadata costs at startup. + // Per-plugin rendered skill-menu budget (NOT a raw description-length sum — + // see RenderedSkillMenuCost and the notes below). This mirrors a REAL GitHub + // Copilot CLI constraint: the CLI renders the model-facing + // list under a hard character budget of 15,000 + // (the agent SDK's SKILL_CHAR_BUDGET, default 15e3 — confirmed in CLI + // 1.0.36 and 1.0.61). Skills are listed alphabetically by name and emitted + // WITH their full only until that budget is exhausted; every + // skill past the cut-off collapses to a bare name with NO description and + // therefore can no longer be reliably model-activated. So once a plugin's + // rendered skill-menu footprint approaches ~15K, its alphabetically-later + // skills silently lose their descriptions — and their discoverability — in + // plugin / marketplace contexts. (This is exactly why dotnet-test's + // `run-tests` and `test-*` skills stopped activating in plugin eval runs.) // - // TODO: validate this guardrail against literature (skill-routing studies) - // and run experiments measuring whether large aggregate description footprints - // actually degrade selection accuracy or just cost more tokens up-front. - // Until then, keep the cap aligned with current enforcement as a hard - // validation failure, while leaving enough headroom for reasonable plugin growth. + // History / correction: this was previously documented here as "a local + // repo policy, NOT a documented Copilot/agentskills constraint" and the cap + // was raised 15,000 -> 20,000 -> 22,000 to admit plugin growth. That masked + // the silent menu truncation instead of fixing it. The agentskills.io + // specification (https://agentskills.io/specification) does only define + // per-skill limits — description (1024 chars, #description-field), + // compatibility (500 chars, #compatibility-field), name (64 chars, + // #name-field) — and no aggregate cap, but the CLI's runtime skill-menu + // budget makes 15,000 the effective ceiling regardless. // - // Raised 20,000 -> 22,000: the dotnet-test plugin (the largest and most - // active) reached ~20,400 aggregate chars after adding the - // find-untested-sources-polyglot skill, legitimate growth that exceeded the - // previous cap. Bumped to restore ~1.6k headroom rather than degrade the - // routing keywords of existing skills. Prior precedent: 15,000 -> 20,000. - internal const int MaxAggregateDescriptionLength = 22_000; + // Notes: + // * The CLI budget is measured over the fully-rendered blocks + // (name + description + location + markup), NOT the raw descriptions — + // those blocks are ~90-100 chars larger per skill. The aggregate below + // mirrors that rendering via RenderedSkillMenuCost(...) (with XML + // escaping applied), so "passing check" faithfully implies the plugin's + // model-invocable menu stays under the real budget instead of being a + // lenient description-only proxy that could still overflow and silently + // truncate alphabetically-later skills. + // * Skills marked `disable-model-invocation: true` are dropped from the + // CLI menu entirely and do not consume the budget; the aggregate below + // excludes them to match. + internal const int MaxRenderedSkillMenuLength = 15_000; private const int MaxNameLength = 64; internal const int MinDescriptionLength = 10; private const int MaxCompatibilityLength = 500; private const long MaxAssetFileSize = 5 * 1024 * 1024; // 5 MB + // Location label the Copilot CLI renders for each skill in the + // menu. The value is environment-dependent ("project", + // "user", "Custom", ...) but short and roughly constant across skills; we + // use a representative value so RenderedSkillMenuCost models the real + // per-skill footprint. + internal const string SkillMenuLocation = "project"; + public static SkillProfile AnalyzeSkill(SkillInfo skill, CheckOptions? options = null) { var allowRepoTraversal = options?.AllowRepoTraversal ?? false; @@ -343,6 +365,32 @@ public static IReadOnlyList FormatDiagnosisHints(SkillProfile profile) ..profile.Warnings.Select(w => $" • {w}")]; } + /// + /// Estimate the number of characters a single skill contributes to the + /// Copilot CLI's model-facing skill menu. This mirrors the runtime rendering + /// in github/copilot-agent-runtime (src/skills/skillToolDescription.ts): each + /// skill is emitted as an XML <skill> block — with XML-escaped + /// name and description and a <location> label — followed by a + /// newline separator. Counting the whole block (not just the description) + /// keeps a conservative proxy for + /// the real 15,000-char SKILL_CHAR_BUDGET, so a plugin that passes the + /// aggregate check cannot silently overflow the menu and truncate + /// alphabetically-later skills. + /// + internal static int RenderedSkillMenuCost(SkillInfo skill) + { + var block = + $"\n {EscapeXml(skill.Name)}\n {EscapeXml(skill.Description)}\n {SkillMenuLocation}\n"; + return block.Length + 1; // +1 for the newline separator between skills + } + + private static string EscapeXml(string s) => + s.Replace("&", "&") + .Replace("<", "<") + .Replace(">", ">") + .Replace("\"", """) + .Replace("'", "'"); + [GeneratedRegex(@"^#{1,4}\s+", RegexOptions.Multiline)] private static partial Regex SectionRegex(); diff --git a/eng/skill-validator/src/Shared/Models.cs b/eng/skill-validator/src/Shared/Models.cs index 1b032c5d13..87623ac954 100644 --- a/eng/skill-validator/src/Shared/Models.cs +++ b/eng/skill-validator/src/Shared/Models.cs @@ -1,3 +1,5 @@ +using YamlDotNet.Serialization; + namespace SkillValidator.Shared; // --- MCP server definition (from plugin.json) --- @@ -18,7 +20,8 @@ public sealed record SkillInfo( string Path, string SkillMdPath, string SkillMdContent, - string? Compatibility = null); + string? Compatibility = null, + bool DisableModelInvocation = false); // --- Agent info --- @@ -48,6 +51,12 @@ public sealed record SkillFrontmatter public string? Name { get; set; } public string? Description { get; set; } public string? Compatibility { get; set; } + + // The frontmatter key is hyphenated (`disable-model-invocation`) and does not + // follow the underscore naming convention, so map it explicitly. A skill with + // this set to true is dropped from the Copilot CLI's model-facing skill menu. + [YamlMember(Alias = "disable-model-invocation", ApplyNamingConventions = false)] + public bool DisableModelInvocation { get; set; } } public sealed record AgentFrontmatter diff --git a/eng/skill-validator/src/Shared/SkillDiscovery.cs b/eng/skill-validator/src/Shared/SkillDiscovery.cs index 7916f36714..cba60c1a61 100644 --- a/eng/skill-validator/src/Shared/SkillDiscovery.cs +++ b/eng/skill-validator/src/Shared/SkillDiscovery.cs @@ -77,7 +77,8 @@ public static async Task> DiscoverSkillsRecursive(strin Path: dirPath, SkillMdPath: skillMdPath, SkillMdContent: skillMdContent, - Compatibility: compatibility); + Compatibility: compatibility, + DisableModelInvocation: metadata.DisableModelInvocation); } internal static (SkillFrontmatter Metadata, string Body) ParseFrontmatter(string content) diff --git a/eng/skill-validator/src/docs/InvestigatingResults.md b/eng/skill-validator/src/docs/InvestigatingResults.md index cdc4e7e8ef..685fae5079 100644 --- a/eng/skill-validator/src/docs/InvestigatingResults.md +++ b/eng/skill-validator/src/docs/InvestigatingResults.md @@ -216,6 +216,37 @@ Several scenario-level options in `eval.yaml` are relevant when diagnosing failu - Make sure the description includes keywords from the scenario - Check the scenario itself has sufficient information that the agent can reason that it needs the skill. (It should not cheat and suggest the skill.) +> **Plugin-arm-only non-activation (skill-menu budget overflow).** If a skill +> activates reliably in the **isolated** arm but consistently fails to activate +> in the **plugin** arm (`skillActivationIsolated.activated: true` but +> `skillActivationPlugin.activated: false`, with empty `detectedSkills`), the +> cause is usually *not* the description text — it may never be shown. The +> Copilot CLI renders the model-facing `` menu under a hard +> **15,000-character budget** (the agent SDK's `SKILL_CHAR_BUDGET`, default +> `15e3`). Skills are listed **alphabetically by name** and emitted with their +> full `` only until the budget is exhausted; every skill past the +> cut-off collapses to a **bare name with no description** and can no longer be +> reliably model-activated. In a large plugin, an alphabetically-late skill +> (e.g. `run-tests`, `test-*`) silently loses its description in the plugin +> menu even though it is fine in isolation. +> +> Fixes for this case (description tuning will *not* help — the text is not in +> the menu): +> - Mark reference / agent-orchestrated skills that are never meant to be +> model-invoked from a user prompt with `disable-model-invocation: true`. +> The CLI drops them from the menu entirely, freeing budget for the skills +> that should be discoverable. (They remain invocable by explicit name.) +> - Reduce the plugin's aggregate skill-menu footprint so its model-invocable +> skills fit under the budget. The `check` command enforces this via +> `SkillProfiler.MaxRenderedSkillMenuLength` (15,000), summing each +> model-invocable skill's **rendered `` block** (name + description + +> location + markup, via `SkillProfiler.RenderedSkillMenuCost`) — not just the +> raw description — and counting only skills *without* +> `disable-model-invocation: true`. Counting the rendered block makes passing +> `check` a faithful proxy for "fits in the real CLI menu budget". +> - As a last resort, consolidate overlapping skills so the plugin exposes +> fewer model-invocable entries. + ### 6. Rubric penalizes valid alternatives **Symptoms:** diff --git a/eng/skill-validator/tests/Check/CheckCommandTests.cs b/eng/skill-validator/tests/Check/CheckCommandTests.cs index 13f6b6dec2..6a71dfe483 100644 --- a/eng/skill-validator/tests/Check/CheckCommandTests.cs +++ b/eng/skill-validator/tests/Check/CheckCommandTests.cs @@ -50,10 +50,32 @@ public async Task UnderAggregateLimit_Passes() } [Fact] - public async Task AtAggregateLimit_Passes() + public void RenderedSkillMenuCost_CountsEscapedNameDescriptionLocationAndMarkup() { - // Create skills whose descriptions sum exactly to the limit, each under per-skill max (1024) - int limit = SkillProfiler.MaxAggregateDescriptionLength; + var skill = new SkillInfo( + Name: "my-skill", + Description: "Tom & Jerry ", + Path: "", + SkillMdPath: "", + SkillMdContent: ""); + + // Mirrors github/copilot-agent-runtime skillToolDescription.ts: the full + // block (XML-escaped name + description, plus location/markup) + // followed by a single newline separator. + string expectedBlock = + $"\n my-skill\n Tom & Jerry <tag>\n {SkillProfiler.SkillMenuLocation}\n"; + + Assert.Equal(expectedBlock.Length + 1, SkillProfiler.RenderedSkillMenuCost(skill)); + } + + [Fact] + public async Task DescriptionsSummingToLimit_Fails_BecauseRenderedOverheadIsCounted() + { + // Descriptions ALONE sum to exactly the cap. The previous check (which + // counted only Description.Length) treated this as "at limit → pass", + // but the real CLI budget also includes each skill's name, location and + // markup, so the rendered total exceeds the cap and must fail. + int limit = SkillProfiler.MaxRenderedSkillMenuLength; int perSkill = 1024; int skillCount = limit / perSkill; int remainder = limit - (skillCount * perSkill); @@ -69,7 +91,7 @@ public async Task AtAggregateLimit_Passes() { var config = new CheckConfig { PluginPaths = [Path.Combine(root, "test-plugin")] }; var result = await CheckCommand.Run(config); - Assert.Equal(0, result); + Assert.Equal(1, result); } finally { Directory.Delete(root, true); } } @@ -77,7 +99,7 @@ public async Task AtAggregateLimit_Passes() [Fact] public async Task OverAggregateLimit_Fails() { - int limit = SkillProfiler.MaxAggregateDescriptionLength; + int limit = SkillProfiler.MaxRenderedSkillMenuLength; int perSkill = 1024; // Enough skills to exceed the aggregate limit int skillCount = (limit / perSkill) + 1; diff --git a/eng/skill-validator/tests/Shared/DiscoveryTests.cs b/eng/skill-validator/tests/Shared/DiscoveryTests.cs index 43479e685c..b5133698dd 100644 --- a/eng/skill-validator/tests/Shared/DiscoveryTests.cs +++ b/eng/skill-validator/tests/Shared/DiscoveryTests.cs @@ -82,6 +82,44 @@ public async Task DiscoverSkillsRecursiveReturnsEmptyForMissingDir() } } +public class ParseFrontmatterTests +{ + [Fact] + public void DisableModelInvocation_True_WhenTopLevelKeySet() + { + var content = "---\nname: my-skill\ndescription: A skill.\ndisable-model-invocation: true\n---\nBody"; + var (metadata, _) = SkillDiscovery.ParseFrontmatter(content); + Assert.True(metadata.DisableModelInvocation); + } + + [Fact] + public void DisableModelInvocation_False_WhenKeyAbsent() + { + var content = "---\nname: my-skill\ndescription: A skill.\n---\nBody"; + var (metadata, _) = SkillDiscovery.ParseFrontmatter(content); + Assert.False(metadata.DisableModelInvocation); + } + + [Fact] + public void DisableModelInvocation_False_WhenKeyAppearsInsideBlockScalarDescription() + { + // Regression: a previous regex-based check matched any line in the YAML, + // so a block-scalar description that merely mentions the key on its own + // line was wrongly treated as disabling model invocation. Proper YAML + // parsing must not be fooled by indented block-scalar content. + var content = + "---\n" + + "name: my-skill\n" + + "description: |\n" + + " This skill explains config options.\n" + + " disable-model-invocation: true\n" + + "---\n" + + "Body"; + var (metadata, _) = SkillDiscovery.ParseFrontmatter(content); + Assert.False(metadata.DisableModelInvocation); + } +} + public class PluginDiscoveryTests { [Fact] diff --git a/plugins/dotnet-test/skills/assertion-quality/SKILL.md b/plugins/dotnet-test/skills/assertion-quality/SKILL.md index f1235d8160..6f9c6ad7f9 100644 --- a/plugins/dotnet-test/skills/assertion-quality/SKILL.md +++ b/plugins/dotnet-test/skills/assertion-quality/SKILL.md @@ -1,6 +1,6 @@ --- name: assertion-quality -description: "Analyzes the variety and depth of assertions across test suites in any language. Use when the user asks to evaluate assertion quality, find shallow tests, identify assertion-free tests (no assertions or only trivial ones like Assert.IsNotNull / toBeTruthy() / assert x is not None), flag self-referential or tautological assertions (output equals input on round-trip operations), measure assertion diversity, or audit whether tests verify different facets of behavior. Polyglot: .NET, Python (pytest), TS/JS (Jest/Vitest), Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, C++. DO NOT USE FOR: writing new tests (use code-testing-agent / writing-mstest-tests), checking whether tests would catch a bug if code changed (mutation reasoning — use test-gap-analysis), anti-patterns like flakiness or duplication, or a general severity-ranked anti-pattern audit even when focused on self-referential / tautological assertions and not asking for assertion-diversity metrics (use test-anti-patterns); fixing assertions." +description: "Analyzes the variety and depth of assertions across test suites in any language. Use when the user asks to evaluate assertion quality, find shallow tests, identify assertion-free tests (no assertions or only trivial ones like Assert.IsNotNull / toBeTruthy()), flag self-referential or tautological assertions, measure assertion diversity, or audit whether tests verify different facets of behavior. Polyglot: .NET, Python, TS/JS, Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, C++. DO NOT USE FOR: writing new tests (use code-testing-agent / writing-mstest-tests), mutation reasoning about whether tests would catch a bug (use test-gap-analysis), or a general severity-ranked anti-pattern audit (use test-anti-patterns); fixing assertions." license: MIT --- diff --git a/plugins/dotnet-test/skills/coverage-analysis/SKILL.md b/plugins/dotnet-test/skills/coverage-analysis/SKILL.md index 2193c48e29..a6de1e03ad 100644 --- a/plugins/dotnet-test/skills/coverage-analysis/SKILL.md +++ b/plugins/dotnet-test/skills/coverage-analysis/SKILL.md @@ -9,13 +9,10 @@ description: > USE FOR: coverage stuck, coverage plateau, can't increase coverage, what's blocking coverage, coverage gap, CRAP scores, risk hotspots, where to add tests, coverage analysis, coverage report. - DO NOT USE FOR: targeted single-method CRAP analysis (use crap-score), - auditing test code for the "coverage-touching" anti-pattern (tests that - execute / call code but assert nothing, inflating coverage without - verifying behavior) — that is a test-code quality audit, use - test-anti-patterns; writing tests; running tests without coverage, or - troubleshooting test execution (use run-tests). This skill requires or - produces coverage (Cobertura) and CRAP metrics. + DO NOT USE FOR: targeted single-method CRAP analysis (use crap-score); + auditing test code for coverage-touching or other anti-patterns (use + test-anti-patterns); writing tests; running tests (use run-tests). Requires + or produces coverage (Cobertura) and CRAP metrics. license: MIT --- diff --git a/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md b/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md deleted file mode 100644 index f51251ce24..0000000000 --- a/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md +++ /dev/null @@ -1,205 +0,0 @@ ---- -name: find-untested-sources-polyglot -description: > - Polyglot, parse-only static analysis that pairs source files with - referencing tests across Python, TypeScript/JavaScript, Go, Java, Rust, - C#, and Ruby. JSON shape matches `find-untested-sources`. - USE FOR: where to write tests next, find untested files, list sources - without tests, polyglot test-pairing map. - DO NOT USE FOR: coverage, CRAP risk. For .NET-only repos prefer - `find-untested-sources`. -disable-model-invocation: true -license: MIT ---- - -# Find Untested Sources (Polyglot) - -## Purpose - -Coverage tools answer "which lines were executed?" — they require a green build -and a passing test run, which is minutes-to-tens-of-minutes on a real repo. -The question this skill answers is different and much cheaper: - -> _Which source files have no test file referencing any of their declared -> symbols?_ - -That's the question an agent asks **before** writing a new test — and it can be -answered statically in a few seconds by parsing every recognized source file -with [tree-sitter](https://tree-sitter.github.io/), with **no build, no -dependency resolution, no compilation**. - -This is the polyglot sibling of the C# `find-untested-sources` skill. The -output schema is intentionally compatible so the same prompt patterns can -consume either tool. - -## When to Use - -- The repository is not exclusively C#, or you want a tool that works - uniformly across multiple languages without per-language plumbing. -- User asks "where should I add tests?", "which files have no tests?", "find - untested code", "give me a test gap list", "what's the next file to test". -- Before invoking a test-generation agent, to produce a prioritized worklist. -- After generating tests, to verify each new test file pairs to a source file. - -## When Not to Use - -- **C#-only repo** — prefer `find-untested-sources`. Its Roslyn-based - namespace disambiguation is strictly better than this skill's identifier - overlap on duplicated short names like `Settings` or `Context`. -- **Line/branch coverage** — use language-native coverage tooling. -- **Are existing tests strong?** — use `test-gap-analysis` or - `assertion-quality`. - -## Inputs - -| Input | Required | Default | Description | -|-------|----------|---------|-------------| -| Repo root | Yes | — | Directory to scan recursively. | -| `--lang LANG` | No | all | Restrict to a language (repeatable). One of `python`, `typescript`, `tsx`, `javascript`, `go`, `java`, `rust`, `csharp`, `ruby`. | -| `--limit-untested N` | No | 0 (no limit) | Truncate the untested list to N entries. | -| `--include-tested` | No | off | Include `tested_sources` in the payload (large). | - -### Prerequisites - -- Python 3.10+. -- `pip install tree-sitter-language-pack` (single self-contained wheel that - bundles parsers for 300+ languages and the high-level `process()` API used - here). No native build, no per-language grammar install. - -## Usage - -```powershell -# From the skill folder -python scripts/find_untested_sources.py - -# Restrict to a language -python scripts/find_untested_sources.py --lang python --lang typescript - -# Truncate the report (top 20 by declared API surface) -python scripts/find_untested_sources.py --limit-untested 20 > pairing.json - -# Iterate, highest-API-surface first -$report = Get-Content pairing.json | ConvertFrom-Json -$report.untested_sources | Select-Object -First 10 path, declaration_count, suggested_test_path -``` - -Diagnostics go to stderr; JSON goes to stdout. - -## Output Schema - -```jsonc -{ - "repo_root": "", - "summary": { - "source_files": 3138, - "test_files": 761, - "tested_source_files": 1419, - "untested_source_files": 1719, - "orphan_test_files": 15, - "languages": ["csharp"] - }, - "untested_sources": [ - { - "path": "src/Foo/Bar.cs", - "language": "csharp", - "declaration_count": 8, - "declarations": ["Bar", "BarOptions", "IBar", "..."], - "suggested_test_path": "src/Foo/BarTests.cs" - } - ], - "orphan_tests": [ - { "path": "tests/SomeIntegrationTest.cs", "language": "csharp" } - ] -} -``` - -Pass `--include-tested` to additionally emit `tested_sources` (same shape as -`untested_sources` but with a `covering_tests` array instead of a suggested -path). Omitted by default to keep the payload small for LLM consumption. - -## How It Works - -1. **File discovery** — recursive directory walk pruning common build/vendor - dirs (`bin`, `obj`, `node_modules`, `target`, `dist`, `build`, `vendor`, - `__pycache__`, `.venv`, `.git`, etc.). Skips generated files (`.d.ts`, - `.g.cs`, `.Designer.cs`, `_pb2.py`, `*.min.js`, `AssemblyInfo.cs`, ...). - -2. **Language detection** — `tree_sitter_language_pack.detect_language_from_path` - maps the extension to one of the supported languages. Unknown extensions - are skipped silently. - -3. **Test-vs-source classification** — per-language path heuristics: - - | Language | Test rule | - |---|---| - | Python | path contains `tests/` or `test/`; or filename starts with `test_` or ends with `_test.py`; or `conftest.py`. | - | JS/TS/TSX | path contains `__tests__`, `tests`, `test`, `spec`, or `e2e`; or filename contains `.test.` or `.spec.`. | - | Go | filename ends with `_test.go` (Go's standard convention). | - | Java | path contains `test` or `tests`; or filename ends with `Test.java` / `Tests.java`. | - | Rust | path contains `tests/` or `benches/`. | - | C# | path contains `tests/`; or project segment ends with `.Tests`, `.Test`, `.UnitTests`, `.IntegrationTests`; or filename ends with `Tests`/`Test`. | - | Ruby | path contains `spec/`, `test/`; or filename ends with `_spec.rb` / `_test.rb`. | - -4. **Per-file extraction** — `tree_sitter_language_pack.process(text, - ProcessConfig(language=lang, structure=True, imports=True, symbols=True))` - returns: - - `structure` — top-level declared items (functions, classes, methods, - traits, ...) with their names. Used as the declared-symbol set. - - `imports` — raw import statements (e.g. `from foo import bar`, - `import "pkg/util"`, `using System.IO;`, `use crate::foo::Bar;`). - - `symbols` — flat declared-name list, unioned with `structure` (acts as - a fallback when `structure` is empty, and broadens coverage when both - are populated; declaration counts may exceed pure structure parsing). - -5. **Pairing** — for each test file, union the results of: - - **Import resolution** (per language): - - Python: `from pkg.mod import x` → `pkg/mod.py` or - `pkg/mod/__init__.py`. - - TS/JS: relative `./foo` / `../bar` → with `.ts`/`.tsx`/`.js`/`.jsx` - and `/index.*` candidate paths. - - Go: `"path/to/pkg"` → any source file whose final path segment - matches `pkg.go` in the index. - - Java: `import a.b.C;` → `a/b/C.java`. - - Rust: `use a::b::C;` → `b.rs` or `C.rs` (best-effort, no module tree). - - Ruby: `require 'foo/bar'` → `foo/bar.rb`. - - C#: `using` maps to namespaces, not files; intentionally a no-op — - falls through to identifier overlap below. - - **Identifier overlap** — every word-like token in the test source is - looked up in the source index of declared names (length ≥ 4 to keep - noise down). Any source whose declared name appears as a token in the - same-language test is paired. - -6. **JSON emit** — `untested_sources` is ordered by declaration count - descending so the highest-API-surface gap appears first. - -## Limitations - -This is a static, parse-only heuristic. It deliberately trades a small amount -of accuracy for orders-of-magnitude lower cost than coverage. Known gaps: - -- **Reflection / DI-resolved types** that a test only references through a - string name or container resolution don't appear in the identifier scan. -- **C#** specifically: namespace disambiguation is the C# tool's strength; - this polyglot version intentionally skips it. If you have a .NET-only - repository, prefer the Roslyn-based `find-untested-sources`. -- **Short identifier names** (< 4 chars) are dropped from the overlap index - to avoid noisy pairings on names like `id`, `db`, `Tag`. -- **Cross-language tests** (Python tests driving a Go binary, etc.) are - recorded as orphan tests since same-language pairing is the rule. -- **Monorepo path aliases** (TS path mapping, Java module-info) are not - resolved; the suffix-match fallback may pick the wrong source if two files - share a trailing path segment in different sub-projects. - -For these cases, run actual coverage on the unpaired candidates the agent -has already triaged. - -## Outputs the agent should consume - -- `untested_sources[*].path` — pick the next source file to test (highest - `declaration_count` first). -- `untested_sources[*].suggested_test_path` — drop-in target for the new - test file using the per-language convention. -- (With `--include-tested`) `tested_sources[*].covering_tests` — verify a - newly written test file lands in the list for the intended source. -- `orphan_tests` — tests that don't appear to reference any same-language - source file; useful for triaging stale tests or integration-only tests. diff --git a/plugins/dotnet-test/skills/find-untested-sources/SKILL.md b/plugins/dotnet-test/skills/find-untested-sources/SKILL.md index 4b722fd1ca..14cd1420d1 100644 --- a/plugins/dotnet-test/skills/find-untested-sources/SKILL.md +++ b/plugins/dotnet-test/skills/find-untested-sources/SKILL.md @@ -1,14 +1,14 @@ --- name: find-untested-sources description: > - Parse-only C# analysis that pairs source files with referencing tests and - emits JSON: `source_to_tests`, `untested` ordered by declaration count, and - `suggested_test_path` from `ProjectReference` edges. - USE FOR: where to write tests next, find untested files, list sources - without tests, build a test-pairing map. - DO NOT USE FOR: coverage (use `coverage-analysis`), CRAP risk ranking, - assertion gaps. -disable-model-invocation: true + Parse-only static analysis that pairs source files with the tests referencing + them and emits JSON listing untested files ordered by API surface, each with a + suggested_test_path. Roslyn engine for C#/.NET (namespace-aware), tree-sitter + engine for polyglot repos (Python, TS/JS, Go, Java, Rust, Ruby). + USE FOR: where to write tests next, which files have no tests, find untested + code, build a source-to-test pairing map, prioritized test-gap worklist. + DO NOT USE FOR: line/branch coverage or CRAP risk (use coverage-analysis); + whether existing tests are strong (use test-gap-analysis or assertion-quality). license: MIT --- @@ -17,16 +17,29 @@ license: MIT ## Purpose Coverage tools answer "which lines were executed?" — they require a green build -and a passing test run, which is minutes-to-tens-of-minutes on a real repo. -The question this skill answers is different and much cheaper: +and a passing test run, which is minutes-to-tens-of-minutes on a real repo. The +question this skill answers is different and much cheaper: -> _Which C# source files have no test file referencing any of their declared types?_ +> _Which source files have no test file referencing any of their declared +> types/symbols?_ That's the question an agent asks **before** writing a new test — and it can be -answered statically in a few seconds by parsing every `.cs` file with the -Roslyn syntax API, with **no `Compilation`, no `MetadataReference`, and no -binding**. The output is a deterministic test-pairing map that lets the agent -pick the next file to test without reading the entire codebase first. +answered statically in a few seconds by parsing source files, with **no build, +no dependency resolution, and no compilation**. The output is a deterministic +test-pairing map that lets the agent pick the next file to test without reading +the entire codebase first. + +## Two engines — pick one + +This skill ships two interchangeable analyzers with a compatible JSON contract: + +| Engine | Script | Use when | +|--------|--------|----------| +| **Roslyn (C#)** | `scripts/Find-UntestedSources.cs` | The repo is **.NET-only**. Parses every `.cs` file with the Roslyn syntax API and does strict **namespace disambiguation**, so it is materially more accurate on duplicated short names like `Settings` or `Context`. | +| **tree-sitter (polyglot)** | `scripts/find_untested_sources.py` | The repo is **not exclusively C#**, or you want one tool across Python, TypeScript/JavaScript, Go, Java, Rust, Ruby, and C#. | + +For a .NET-only repository, **prefer the Roslyn engine** — its namespace-aware +pairing beats the polyglot engine's identifier overlap. ## When to Use @@ -34,7 +47,7 @@ pick the next file to test without reading the entire codebase first. untested code", "give me a test gap list", "what's the next file to test". - Before invoking a test-generation agent, to produce a prioritized worklist. - After generating tests, to verify each new test file pairs to a source file. -- To enumerate "weakly paired" source files (only one referring test file) for +- To enumerate "weakly paired" source files (only one referring test) for follow-up depth checks. ## When Not to Use @@ -43,23 +56,17 @@ pick the next file to test without reading the entire codebase first. - **CRAP-score / risk hotspots** — use `coverage-analysis`. - **Are existing tests strong?** — use `test-gap-analysis` (mutation reasoning) or `assertion-quality`. -- **Tests for non-C# code** — this prototype is C#-only. - -## Inputs -| Input | Required | Default | Description | -|-------|----------|---------|-------------| -| Repo root | Yes | — | Directory to scan recursively for `.cs` files. | -| `--top N` | No | all | Truncate the `untested` list to the top N entries by declaration count. | +## Roslyn engine (C#) ### Prerequisites -- .NET SDK that supports file-based apps (`dotnet run script.cs`). Pinned in - the repo's `global.json` (SDK 11 preview or later). +- .NET SDK that supports file-based apps (`dotnet run script.cs`). Pinned in the + repo's `global.json` (SDK 11 preview or later). - No internet access required beyond the initial NuGet restore of `Microsoft.CodeAnalysis.CSharp` on first run. -## Usage +### Usage ```powershell # From the skill folder @@ -75,7 +82,7 @@ $report.untested | Select-Object -First 10 source, decl_count, suggested_test_pa Diagnostics go to stderr; JSON goes to stdout. -## Output Schema +### Output schema ```jsonc { @@ -104,68 +111,149 @@ Diagnostics go to stderr; JSON goes to stdout. } ``` -## How It Works - -1. **File discovery** — recursive directory walk pruning `bin/`, `obj/`, - `node_modules/`, `.git/`, `.vs/`, `packages/`, and any dotted subdir. - Skips generated files (`.g.cs`, `.Designer.cs`, `.AssemblyInfo.cs`). +### How it works +1. **File discovery** — recursive walk pruning `bin/`, `obj/`, `node_modules/`, + `.git/`, `.vs/`, `packages/`, and any dotted subdir. Skips generated files + (`.g.cs`, `.Designer.cs`, `.AssemblyInfo.cs`). 2. **Test vs source classification** — walks up to the nearest `.csproj` and - marks it as a test project if (a) the project name ends in `.Tests`, - `.Test`, `.UnitTests`, `.IntegrationTests`, `.E2E`, `.EndToEnd`, `.Spec`, - `.Specs`, or (b) the file content references `Microsoft.NET.Test.Sdk`, - `MSTest.Sdk`, `Microsoft.Testing.Platform`, `xunit`, `NUnit`, `TUnit`, or + marks it a test project if the project name ends in `.Tests`, `.Test`, + `.UnitTests`, `.IntegrationTests`, `.E2E`, `.EndToEnd`, `.Spec`, `.Specs`, or + the content references `Microsoft.NET.Test.Sdk`, `MSTest.Sdk`, + `Microsoft.Testing.Platform`, `xunit`, `NUnit`, `TUnit`, or `true`. - -3. **Source index (parallel)** — for each source file, parse with - `CSharpSyntaxTree.ParseText` (syntax only, no compilation). Walk every - `BaseTypeDeclarationSyntax` and `DelegateDeclarationSyntax` and record +3. **Source index (parallel)** — parse each source file with + `CSharpSyntaxTree.ParseText` (syntax only, no compilation); record every + `BaseTypeDeclarationSyntax` / `DelegateDeclarationSyntax` as `(ShortName, EnclosingNamespace, FilePath)`. +4. **Test scan (parallel)** — parse each test file, collect `using` directives + + enclosing namespace, walk every `IdentifierToken`, look it up in the + short-name index, and **disambiguate strictly**: an identifier is attributed + only if the declaration's namespace matches one of the test file's `using` + directives, the enclosing namespace, or a prefix of them. This avoids noise + where common names like `Settings` or `Context` match every project. +5. **Pairing & suggestion** — invert into `source → [tests]`. Build a + production-to-test project map from `` entries; for each + untested source, mirror its in-project relative path under the referencing + test project to suggest a path. +6. **JSON emit** — ordered by declaration count desc, then alphabetical. -4. **Test scan (parallel)** — for each test file, parse, collect `using` - directives + enclosing namespace, walk every `IdentifierToken`, look it up - in the short-name index, and **disambiguate strictly**: an identifier is - attributed to a declaration only if the declaration's namespace matches one - of the test file's `using` directives, the enclosing namespace, or a - prefix of them. Identifiers that don't resolve under that constraint are - dropped (avoids the noise where common names like `Settings` or `Context` - would otherwise match every project that happens to declare them). +## Polyglot engine (tree-sitter) -5. **Pairing & suggestion** — invert into `source → [tests]`. Build a - production-to-test project map from `` entries in test - `.csproj` files; for each untested source, mirror its in-project relative - path under the referencing test project and append `Tests.cs` to suggest a - path. +### Prerequisites -6. **JSON emit** — ordered by declaration count desc, then alphabetical. +- Python 3.10+. +- `pip install tree-sitter-language-pack` (single self-contained wheel that + bundles parsers for 300+ languages and the high-level `process()` API). No + native build, no per-language grammar install. + +### Usage + +```powershell +# From the skill folder +python scripts/find_untested_sources.py + +# Restrict to a language (repeatable) +python scripts/find_untested_sources.py --lang python --lang typescript + +# Truncate the report (top 20 by declared API surface) +python scripts/find_untested_sources.py --limit-untested 20 > pairing.json + +# Iterate, highest-API-surface first +$report = Get-Content pairing.json | ConvertFrom-Json +$report.untested_sources | Select-Object -First 10 path, declaration_count, suggested_test_path +``` + +Pass `--include-tested` to additionally emit `tested_sources` (omitted by +default to keep the payload small for LLM consumption). Diagnostics go to +stderr; JSON goes to stdout. + +### Output schema + +```jsonc +{ + "repo_root": "", + "summary": { + "source_files": 3138, + "test_files": 761, + "tested_source_files": 1419, + "untested_source_files": 1719, + "orphan_test_files": 15, + "languages": ["csharp"] + }, + "untested_sources": [ + { + "path": "src/Foo/Bar.cs", + "language": "csharp", + "declaration_count": 8, + "declarations": ["Bar", "BarOptions", "IBar", "..."], + "suggested_test_path": "src/Foo/BarTests.cs" + } + ], + "orphan_tests": [ + { "path": "tests/SomeIntegrationTest.cs", "language": "csharp" } + ] +} +``` + +### How it works + +1. **File discovery** — recursive walk pruning common build/vendor dirs (`bin`, + `obj`, `node_modules`, `target`, `dist`, `build`, `vendor`, `__pycache__`, + `.venv`, `.git`, …) and generated files (`.d.ts`, `.g.cs`, `.Designer.cs`, + `_pb2.py`, `*.min.js`, `AssemblyInfo.cs`, …). +2. **Language detection** — `detect_language_from_path` maps the extension to a + supported language; unknown extensions are skipped. +3. **Test-vs-source classification** — per-language path heuristics: + + | Language | Test rule | + |---|---| + | Python | path contains `tests/`/`test/`; or filename starts with `test_` or ends `_test.py`; or `conftest.py`. | + | JS/TS/TSX | path contains `__tests__`, `tests`, `test`, `spec`, `e2e`; or filename contains `.test.`/`.spec.`. | + | Go | filename ends `_test.go`. | + | Java | path contains `test`/`tests`; or filename ends `Test.java`/`Tests.java`. | + | Rust | path contains `tests/`/`benches/`. | + | C# | path contains `tests/`; or project segment ends `.Tests`/`.Test`/`.UnitTests`/`.IntegrationTests`; or filename ends `Tests`/`Test`. | + | Ruby | path contains `spec/`/`test/`; or filename ends `_spec.rb`/`_test.rb`. | + +4. **Per-file extraction** — `process(text, ProcessConfig(structure, imports, + symbols))` returns declared items, raw import statements, and a flat declared + -name list. +5. **Pairing** — for each test file, union **import resolution** (per language, + e.g. Python `from pkg.mod import x` → `pkg/mod.py`; Java `import a.b.C;` → + `a/b/C.java`; C# `using` is namespace-not-file, so a no-op) with **identifier + overlap** (word-like tokens, length ≥ 4, matched against declared names). +6. **JSON emit** — `untested_sources` ordered by declaration count descending. ## Limitations (be honest with the agent) -This is a static, parse-only heuristic. It deliberately trades a small amount -of accuracy for orders-of-magnitude lower cost than coverage. Known gaps: - -- **Reflection-driven tests** that exercise a type only via - `Type.GetType(...)` / `Activator.CreateInstance` won't be detected — the - type's short name never appears in the test source. -- **DI-resolved types** referenced only by `IServiceProvider.GetRequiredService()` - where `T` is an interface and the implementation isn't named in the test. -- **Extension methods** invoked as instance methods. The extension class is - not named, only the method, so the source file declaring the static class - is not credited. -- **`var`, target-typed `new()`, and pattern matching** lose the type token; - the file-level union usually still catches it through other references. -- **Cross-language**: any source file driven by JSON/YAML test fixtures, code - generators, or compiled-only references is not detected. +Both engines are static, parse-only heuristics that trade a little accuracy for +orders-of-magnitude lower cost than coverage. Known gaps: + +- **Reflection / DI-resolved types** referenced only via a string name or + container resolution won't be detected — the type's short name never appears + in the test source. +- **Extension methods** invoked as instance methods (C#): the declaring static + class is not named, so its file is not credited. +- **`var`, target-typed `new()`, pattern matching** lose the type token; the + file-level union usually still catches it through other references. +- **Short identifier names** (polyglot, < 4 chars) are dropped to avoid noisy + pairings on names like `id`, `db`, `Tag`. +- **Monorepo path aliases** (TS path mapping, Java module-info) are not + resolved; a suffix-match fallback may pick the wrong source if two files share + a trailing path segment. For these cases, run actual coverage (`coverage-analysis`) on the unpaired candidates the agent has already triaged. ## Outputs the agent should consume -- `untested[*].source` — pick the next source file to test (highest - `decl_count` first). -- `untested[*].suggested_test_path` — drop-in target for the new test file; - honors the test project that already ``s the source's +- `untested[*].source` / `untested_sources[*].path` — pick the next source file + to test (highest declaration count first). +- `*.suggested_test_path` — drop-in target for the new test file; the Roslyn + engine honors the test project that already ``s the source's project, so `dotnet sln add` is not needed. -- `source_to_tests` — verify a newly written test file lands in the list for - the intended source. +- `source_to_tests` (Roslyn) / `--include-tested` `tested_sources` (polyglot) — + verify a newly written test file lands in the list for the intended source. +- `orphan_tests` (polyglot) — tests that don't reference any same-language + source file; useful for triaging stale or integration-only tests. diff --git a/plugins/dotnet-test/skills/find-untested-sources-polyglot/scripts/find_untested_sources.py b/plugins/dotnet-test/skills/find-untested-sources/scripts/find_untested_sources.py similarity index 100% rename from plugins/dotnet-test/skills/find-untested-sources-polyglot/scripts/find_untested_sources.py rename to plugins/dotnet-test/skills/find-untested-sources/scripts/find_untested_sources.py diff --git a/plugins/dotnet-test/skills/grade-tests/SKILL.md b/plugins/dotnet-test/skills/grade-tests/SKILL.md index 95ea216bb9..be5533d0d8 100644 --- a/plugins/dotnet-test/skills/grade-tests/SKILL.md +++ b/plugins/dotnet-test/skills/grade-tests/SKILL.md @@ -6,12 +6,10 @@ description: > score band, and a one-line note — designed to be posted as a PR comment. Use when the caller wants per-test feedback on a curated list of methods (for example, the new or modified tests in a pull request), not a - suite-wide audit. Polyglot: .NET (MSTest/xUnit/NUnit/TUnit), Python - (pytest/unittest), TS/JS (Jest/Vitest/Mocha/node:test), Java (JUnit/TestNG), - Go, Ruby (RSpec/Minitest), Rust, Swift (XCTest/Swift Testing), Kotlin - (JUnit/Kotest), PowerShell (Pester), C++ (GoogleTest/Catch2/doctest). - Input is a list of test methods (or method bodies / file+line spans); - output is a compact markdown table plus a short summary. DO NOT USE FOR: + suite-wide audit. Polyglot: .NET, Python, TS/JS, Java, Go, Ruby, Rust, + Swift, Kotlin, PowerShell, C++. Input is a list of test methods (or method + bodies / file+line spans); output is a compact markdown table plus a short + summary. DO NOT USE FOR: full suite audits (use test-quality-auditor agent or test-anti-patterns), writing new tests (use code-testing-generator agent or writing-mstest-tests), fixing failures, or measuring code coverage. diff --git a/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md b/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md index 98add4909a..9234863cb0 100644 --- a/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md +++ b/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md @@ -6,12 +6,11 @@ description: > Performs codemod-style bulk replacement of DateTime.UtcNow to TimeProvider.GetUtcNow(), File.ReadAllText to IFileSystem, and similar transformations. Adds constructor injection parameters and updates DI registration. - USE FOR: replace DateTime.UtcNow with TimeProvider, replace DateTime.Now with - TimeProvider, migrate static calls to wrapper, bulk replace File.* with IFileSystem, - codemod static to injectable, add constructor injection for time provider, - mechanical migration of statics, refactor DateTime to TimeProvider, swap static - for injected dependency, convert static calls to use abstraction, replace statics - in a class, migrate one file to TimeProvider, scoped migration, update call sites. + USE FOR: replace DateTime.Now/UtcNow with TimeProvider, migrate static calls + to wrapper, bulk replace File.* with IFileSystem, codemod static to + injectable, add constructor injection for a dependency, mechanical or scoped + migration of statics, convert static calls to use an abstraction, update call + sites. DO NOT USE FOR: detecting statics (use detect-static-dependencies), generating wrappers (use generate-testability-wrappers), migrating between test frameworks. license: MIT diff --git a/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md b/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md index a3c90b6df6..e262a89b1d 100644 --- a/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md +++ b/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md @@ -5,18 +5,15 @@ description: > and quality issues — produces a severity-ranked report (Critical/Warning/Info). INVOKE whenever asked to audit or review tests, find what's wrong with a suite, judge whether tests are any good, or - check tests for: tests that pass but verify nothing, no/missing - assertions, swallowed exceptions, self-comparing/self-referential/ - tautological assertions (output==input on round-trip/identity ops), - coverage-touching tests (every method called but nothing verified), - broad exceptions, flaky or order-dependent tests (Thread.Sleep, - DateTime.Now, time.sleep, shared state, reflection coupling), - duplicated tests, magic values — in .NET, Python/pytest, TS/Jest, Java, - Go, Ruby or C++. DO NOT USE FOR: writing new tests (use - code-testing-agent); running tests (use run-tests); migration; - assertion-diversity metrics (use assertion-quality); coverage/CRAP - metrics (use coverage-analysis); the testsmells.org academic catalog - (use test-smell-detection). + check for: tests that pass but verify nothing, missing assertions, + swallowed exceptions, self-comparing / tautological assertions, + coverage-touching tests, broad exceptions, flaky or order-dependent tests + (Thread.Sleep, DateTime.Now, shared state), duplicated tests, or magic + values — in .NET, Python/pytest, TS/Jest, Java, Go, Ruby or C++. DO NOT + USE FOR: writing new tests (use code-testing-agent); running tests (use + run-tests); migration; assertion-diversity metrics (use assertion-quality); + coverage/CRAP metrics (use coverage-analysis); the testsmells.org academic + catalog (use test-smell-detection). license: MIT --- diff --git a/plugins/dotnet-test/skills/test-tagging/SKILL.md b/plugins/dotnet-test/skills/test-tagging/SKILL.md index 52ca46d407..e2e0af2717 100644 --- a/plugins/dotnet-test/skills/test-tagging/SKILL.md +++ b/plugins/dotnet-test/skills/test-tagging/SKILL.md @@ -1,6 +1,6 @@ --- name: test-tagging -description: "Analyzes test suites in any language and tags each test with a standardized set of traits (positive, negative, critical-path, boundary, smoke, regression, integration, performance, security). Use when the user wants to categorize, audit, or label tests with traits. Works with .NET (MSTest TestCategory / xUnit Trait / NUnit Category / TUnit Property), Python (pytest markers; unittest has no canonical tag syntax so report-only), TypeScript/JavaScript (Jest/Vitest test names, describe-block conventions), Java (JUnit 5 @Tag / TestNG groups), Go (subtest naming / build tags / file _test.go), Ruby (RSpec metadata), Rust (cargo test naming / cfg attributes), Swift (XCTest test plans / Swift Testing @Tag), Kotlin (JUnit @Tag / Kotest tags), PowerShell (Pester -Tag), C++ (GoogleTest filter prefixes / Catch2 [tags] / doctest decorators). Auto-edits when the framework has canonical syntax; falls back to report-only otherwise. Do not use for writing new tests, running tests, or migrating frameworks." +description: "Analyzes test suites in any language and tags each test with standardized traits (positive, negative, critical-path, boundary, smoke, regression, integration, performance, security). Use when the user wants to categorize, audit, or label tests with traits. Works across .NET (MSTest/xUnit/NUnit/TUnit), Python (pytest), TS/JS (Jest/Vitest), Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, and C++ — auto-editing when the framework has canonical tag syntax, otherwise report-only. Do not use for writing new tests, running tests, or migrating frameworks." license: MIT ---