diff --git a/eng/skill-validator/src/Check/CheckCommand.cs b/eng/skill-validator/src/Check/CheckCommand.cs
index 2dd32ac4e8..c3f52a7c4e 100644
--- a/eng/skill-validator/src/Check/CheckCommand.cs
+++ b/eng/skill-validator/src/Check/CheckCommand.cs
@@ -14,6 +14,12 @@ public static class CheckCommand
         ? StringComparer.OrdinalIgnoreCase
         : StringComparer.Ordinal;
 
+    // A skill with `disable-model-invocation: true` in its frontmatter is
+    // dropped from the Copilot CLI's model-facing skill menu and therefore does
+    // not consume the skill-menu character budget tracked by
+    // SkillProfiler.MaxRenderedSkillMenuLength. The flag is parsed once during
+    // discovery and surfaced on SkillInfo.DisableModelInvocation.
+
     public static Command Create()
     {
         var pluginOpt = new Option<string[]>("--plugin") { Description = "Plugin directories to check (discovers skills, agents, plugin.json)", AllowMultipleArgumentsPerToken = true };
@@ -219,14 +225,26 @@ private static async Task<int> RunPluginCheck(CheckConfig config, CheckReportBui
 
         foreach (var (pluginDirectoryPath, skills) in pluginSkills)
         {
-            int totalChars = skills.Sum(s => s.Description.Length);
-            if (totalChars <= SkillProfiler.MaxAggregateDescriptionLength)
+            // Sum each model-invocable skill's RENDERED menu cost — the full
+            // <skill> block the Copilot CLI emits (name + description + location
+            // + markup), via SkillProfiler.RenderedSkillMenuCost — so this
+            // mirrors the real SKILL_CHAR_BUDGET rather than just the raw
+            // description length.
+            //
+            // Skills hidden from the model-facing skill menu via
+            // `disable-model-invocation: true` do not consume that budget, so
+            // they are excluded from the aggregate (see
+            // SkillProfiler.MaxRenderedSkillMenuLength).
+            int totalChars = skills
+                .Where(s => !s.DisableModelInvocation)
+                .Sum(SkillProfiler.RenderedSkillMenuCost);
+            if (totalChars <= SkillProfiler.MaxRenderedSkillMenuLength)
                 continue;
 
             var pluginResult = builder.Plugins.FirstOrDefault(p => string.Equals(p.DirectoryPath, pluginDirectoryPath, s_pathComparison));
             var pluginLabel = pluginResult?.Name
                 ?? Path.GetFileName(pluginDirectoryPath.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar));
-            var message = $"Plugin '{pluginLabel}' aggregate description size is {totalChars:N0} characters — maximum is {SkillProfiler.MaxAggregateDescriptionLength:N0}.";
+            var message = $"Plugin '{pluginLabel}' rendered skill-menu size is {totalChars:N0} characters — maximum is {SkillProfiler.MaxRenderedSkillMenuLength:N0}.";
             if (pluginResult is not null)
                 pluginResult.Errors.Add(message);
             else
diff --git a/eng/skill-validator/src/Check/SkillProfiler.cs b/eng/skill-validator/src/Check/SkillProfiler.cs
index c20315fd6a..28be7ad95d 100644
--- a/eng/skill-validator/src/Check/SkillProfiler.cs
+++ b/eng/skill-validator/src/Check/SkillProfiler.cs
@@ -32,33 +32,55 @@ public static partial class SkillProfiler
     // vocabularies, but BPE counts are close enough across models for complexity classification.
     private static readonly Lazy<TiktokenTokenizer> s_bpeTokenizer = new(() => TiktokenTokenizer.CreateForModel("gpt-4"));
 
-    // Per-plugin aggregate description size cap. NOTE: this is a local repo
-    // policy, NOT a documented Copilot/agentskills constraint. The agentskills.io
-    // specification (https://agentskills.io/specification) defines per-skill
-    // limits — description (1024 chars, #description-field), compatibility
-    // (500 chars, #compatibility-field), and name (64 chars, #name-field) —
-    // but does NOT define any aggregate per-plugin cap. The original 15,000
-    // was introduced in #238 / discussed in #222 ("15K characters was
-    // mentioned, we could choose smaller") as an informal guardrail against
-    // bloated metadata costs at startup.
+    // Per-plugin rendered skill-menu budget (NOT a raw description-length sum —
+    // see RenderedSkillMenuCost and the notes below). This mirrors a REAL GitHub
+    // Copilot CLI constraint: the CLI renders the model-facing
+    // <available_skills> list under a hard character budget of 15,000
+    // (the agent SDK's SKILL_CHAR_BUDGET, default 15e3 — confirmed in CLI
+    // 1.0.36 and 1.0.61). Skills are listed alphabetically by name and emitted
+    // WITH their full <description> only until that budget is exhausted; every
+    // skill past the cut-off collapses to a bare name with NO description and
+    // therefore can no longer be reliably model-activated. So once a plugin's
+    // rendered skill-menu footprint approaches ~15K, its alphabetically-later
+    // skills silently lose their descriptions — and their discoverability — in
+    // plugin / marketplace contexts. (This is exactly why dotnet-test's
+    // `run-tests` and `test-*` skills stopped activating in plugin eval runs.)
     //
-    // TODO: validate this guardrail against literature (skill-routing studies)
-    // and run experiments measuring whether large aggregate description footprints
-    // actually degrade selection accuracy or just cost more tokens up-front.
-    // Until then, keep the cap aligned with current enforcement as a hard
-    // validation failure, while leaving enough headroom for reasonable plugin growth.
+    // History / correction: this was previously documented here as "a local
+    // repo policy, NOT a documented Copilot/agentskills constraint" and the cap
+    // was raised 15,000 -> 20,000 -> 22,000 to admit plugin growth. That masked
+    // the silent menu truncation instead of fixing it. The agentskills.io
+    // specification (https://agentskills.io/specification) does only define
+    // per-skill limits — description (1024 chars, #description-field),
+    // compatibility (500 chars, #compatibility-field), name (64 chars,
+    // #name-field) — and no aggregate cap, but the CLI's runtime skill-menu
+    // budget makes 15,000 the effective ceiling regardless.
     //
-    // Raised 20,000 -> 22,000: the dotnet-test plugin (the largest and most
-    // active) reached ~20,400 aggregate chars after adding the
-    // find-untested-sources-polyglot skill, legitimate growth that exceeded the
-    // previous cap. Bumped to restore ~1.6k headroom rather than degrade the
-    // routing keywords of existing skills. Prior precedent: 15,000 -> 20,000.
-    internal const int MaxAggregateDescriptionLength = 22_000;
+    // Notes:
+    //  * The CLI budget is measured over the fully-rendered <skill> blocks
+    //    (name + description + location + markup), NOT the raw descriptions —
+    //    those blocks are ~90-100 chars larger per skill. The aggregate below
+    //    mirrors that rendering via RenderedSkillMenuCost(...) (with XML
+    //    escaping applied), so "passing check" faithfully implies the plugin's
+    //    model-invocable menu stays under the real budget instead of being a
+    //    lenient description-only proxy that could still overflow and silently
+    //    truncate alphabetically-later skills.
+    //  * Skills marked `disable-model-invocation: true` are dropped from the
+    //    CLI menu entirely and do not consume the budget; the aggregate below
+    //    excludes them to match.
+    internal const int MaxRenderedSkillMenuLength = 15_000;
     private const int MaxNameLength = 64;
     internal const int MinDescriptionLength = 10;
     private const int MaxCompatibilityLength = 500;
     private const long MaxAssetFileSize = 5 * 1024 * 1024; // 5 MB
 
+    // Location label the Copilot CLI renders for each skill in the
+    // <available_skills> menu. The value is environment-dependent ("project",
+    // "user", "Custom", ...) but short and roughly constant across skills; we
+    // use a representative value so RenderedSkillMenuCost models the real
+    // per-skill footprint.
+    internal const string SkillMenuLocation = "project";
+
     public static SkillProfile AnalyzeSkill(SkillInfo skill, CheckOptions? options = null)
     {
         var allowRepoTraversal = options?.AllowRepoTraversal ?? false;
@@ -343,6 +365,32 @@ public static IReadOnlyList<string> FormatDiagnosisHints(SkillProfile profile)
             ..profile.Warnings.Select(w => $"  • {w}")];
     }
 
+    /// <summary>
+    /// Estimate the number of characters a single skill contributes to the
+    /// Copilot CLI's model-facing skill menu. This mirrors the runtime rendering
+    /// in github/copilot-agent-runtime (src/skills/skillToolDescription.ts): each
+    /// skill is emitted as an XML <c>&lt;skill&gt;</c> block — with XML-escaped
+    /// name and description and a <c>&lt;location&gt;</c> label — followed by a
+    /// newline separator. Counting the whole block (not just the description)
+    /// keeps <see cref="MaxRenderedSkillMenuLength"/> a conservative proxy for
+    /// the real 15,000-char <c>SKILL_CHAR_BUDGET</c>, so a plugin that passes the
+    /// aggregate check cannot silently overflow the menu and truncate
+    /// alphabetically-later skills.
+    /// </summary>
+    internal static int RenderedSkillMenuCost(SkillInfo skill)
+    {
+        var block =
+            $"<skill>\n  <name>{EscapeXml(skill.Name)}</name>\n  <description>{EscapeXml(skill.Description)}</description>\n  <location>{SkillMenuLocation}</location>\n</skill>";
+        return block.Length + 1; // +1 for the newline separator between skills
+    }
+
+    private static string EscapeXml(string s) =>
+        s.Replace("&", "&amp;")
+         .Replace("<", "&lt;")
+         .Replace(">", "&gt;")
+         .Replace("\"", "&quot;")
+         .Replace("'", "&apos;");
+
     [GeneratedRegex(@"^#{1,4}\s+", RegexOptions.Multiline)]
     private static partial Regex SectionRegex();
 
diff --git a/eng/skill-validator/src/Shared/Models.cs b/eng/skill-validator/src/Shared/Models.cs
index 1b032c5d13..87623ac954 100644
--- a/eng/skill-validator/src/Shared/Models.cs
+++ b/eng/skill-validator/src/Shared/Models.cs
@@ -1,3 +1,5 @@
+using YamlDotNet.Serialization;
+
 namespace SkillValidator.Shared;
 
 // --- MCP server definition (from plugin.json) ---
@@ -18,7 +20,8 @@ public sealed record SkillInfo(
     string Path,
     string SkillMdPath,
     string SkillMdContent,
-    string? Compatibility = null);
+    string? Compatibility = null,
+    bool DisableModelInvocation = false);
 
 // --- Agent info ---
 
@@ -48,6 +51,12 @@ public sealed record SkillFrontmatter
     public string? Name { get; set; }
     public string? Description { get; set; }
     public string? Compatibility { get; set; }
+
+    // The frontmatter key is hyphenated (`disable-model-invocation`) and does not
+    // follow the underscore naming convention, so map it explicitly. A skill with
+    // this set to true is dropped from the Copilot CLI's model-facing skill menu.
+    [YamlMember(Alias = "disable-model-invocation", ApplyNamingConventions = false)]
+    public bool DisableModelInvocation { get; set; }
 }
 
 public sealed record AgentFrontmatter
diff --git a/eng/skill-validator/src/Shared/SkillDiscovery.cs b/eng/skill-validator/src/Shared/SkillDiscovery.cs
index 7916f36714..cba60c1a61 100644
--- a/eng/skill-validator/src/Shared/SkillDiscovery.cs
+++ b/eng/skill-validator/src/Shared/SkillDiscovery.cs
@@ -77,7 +77,8 @@ public static async Task<IReadOnlyList<SkillInfo>> DiscoverSkillsRecursive(strin
             Path: dirPath,
             SkillMdPath: skillMdPath,
             SkillMdContent: skillMdContent,
-            Compatibility: compatibility);
+            Compatibility: compatibility,
+            DisableModelInvocation: metadata.DisableModelInvocation);
     }
 
     internal static (SkillFrontmatter Metadata, string Body) ParseFrontmatter(string content)
diff --git a/eng/skill-validator/src/docs/InvestigatingResults.md b/eng/skill-validator/src/docs/InvestigatingResults.md
index cdc4e7e8ef..685fae5079 100644
--- a/eng/skill-validator/src/docs/InvestigatingResults.md
+++ b/eng/skill-validator/src/docs/InvestigatingResults.md
@@ -216,6 +216,37 @@ Several scenario-level options in `eval.yaml` are relevant when diagnosing failu
 - Make sure the description includes keywords from the scenario
 - Check the scenario itself has sufficient information that the agent can reason that it needs the skill. (It should not cheat and suggest the skill.)
 
+> **Plugin-arm-only non-activation (skill-menu budget overflow).** If a skill
+> activates reliably in the **isolated** arm but consistently fails to activate
+> in the **plugin** arm (`skillActivationIsolated.activated: true` but
+> `skillActivationPlugin.activated: false`, with empty `detectedSkills`), the
+> cause is usually *not* the description text — it may never be shown. The
+> Copilot CLI renders the model-facing `<available_skills>` menu under a hard
+> **15,000-character budget** (the agent SDK's `SKILL_CHAR_BUDGET`, default
+> `15e3`). Skills are listed **alphabetically by name** and emitted with their
+> full `<description>` only until the budget is exhausted; every skill past the
+> cut-off collapses to a **bare name with no description** and can no longer be
+> reliably model-activated. In a large plugin, an alphabetically-late skill
+> (e.g. `run-tests`, `test-*`) silently loses its description in the plugin
+> menu even though it is fine in isolation.
+>
+> Fixes for this case (description tuning will *not* help — the text is not in
+> the menu):
+> - Mark reference / agent-orchestrated skills that are never meant to be
+>   model-invoked from a user prompt with `disable-model-invocation: true`.
+>   The CLI drops them from the menu entirely, freeing budget for the skills
+>   that should be discoverable. (They remain invocable by explicit name.)
+> - Reduce the plugin's aggregate skill-menu footprint so its model-invocable
+>   skills fit under the budget. The `check` command enforces this via
+>   `SkillProfiler.MaxRenderedSkillMenuLength` (15,000), summing each
+>   model-invocable skill's **rendered `<skill>` block** (name + description +
+>   location + markup, via `SkillProfiler.RenderedSkillMenuCost`) — not just the
+>   raw description — and counting only skills *without*
+>   `disable-model-invocation: true`. Counting the rendered block makes passing
+>   `check` a faithful proxy for "fits in the real CLI menu budget".
+> - As a last resort, consolidate overlapping skills so the plugin exposes
+>   fewer model-invocable entries.
+
 ### 6. Rubric penalizes valid alternatives
 
 **Symptoms:**
diff --git a/eng/skill-validator/tests/Check/CheckCommandTests.cs b/eng/skill-validator/tests/Check/CheckCommandTests.cs
index 13f6b6dec2..6a71dfe483 100644
--- a/eng/skill-validator/tests/Check/CheckCommandTests.cs
+++ b/eng/skill-validator/tests/Check/CheckCommandTests.cs
@@ -50,10 +50,32 @@ public async Task UnderAggregateLimit_Passes()
     }
 
     [Fact]
-    public async Task AtAggregateLimit_Passes()
+    public void RenderedSkillMenuCost_CountsEscapedNameDescriptionLocationAndMarkup()
     {
-        // Create skills whose descriptions sum exactly to the limit, each under per-skill max (1024)
-        int limit = SkillProfiler.MaxAggregateDescriptionLength;
+        var skill = new SkillInfo(
+            Name: "my-skill",
+            Description: "Tom & Jerry <tag>",
+            Path: "",
+            SkillMdPath: "",
+            SkillMdContent: "");
+
+        // Mirrors github/copilot-agent-runtime skillToolDescription.ts: the full
+        // <skill> block (XML-escaped name + description, plus location/markup)
+        // followed by a single newline separator.
+        string expectedBlock =
+            $"<skill>\n  <name>my-skill</name>\n  <description>Tom &amp; Jerry &lt;tag&gt;</description>\n  <location>{SkillProfiler.SkillMenuLocation}</location>\n</skill>";
+
+        Assert.Equal(expectedBlock.Length + 1, SkillProfiler.RenderedSkillMenuCost(skill));
+    }
+
+    [Fact]
+    public async Task DescriptionsSummingToLimit_Fails_BecauseRenderedOverheadIsCounted()
+    {
+        // Descriptions ALONE sum to exactly the cap. The previous check (which
+        // counted only Description.Length) treated this as "at limit → pass",
+        // but the real CLI budget also includes each skill's name, location and
+        // <skill> markup, so the rendered total exceeds the cap and must fail.
+        int limit = SkillProfiler.MaxRenderedSkillMenuLength;
         int perSkill = 1024;
         int skillCount = limit / perSkill;
         int remainder = limit - (skillCount * perSkill);
@@ -69,7 +91,7 @@ public async Task AtAggregateLimit_Passes()
         {
             var config = new CheckConfig { PluginPaths = [Path.Combine(root, "test-plugin")] };
             var result = await CheckCommand.Run(config);
-            Assert.Equal(0, result);
+            Assert.Equal(1, result);
         }
         finally { Directory.Delete(root, true); }
     }
@@ -77,7 +99,7 @@ public async Task AtAggregateLimit_Passes()
     [Fact]
     public async Task OverAggregateLimit_Fails()
     {
-        int limit = SkillProfiler.MaxAggregateDescriptionLength;
+        int limit = SkillProfiler.MaxRenderedSkillMenuLength;
         int perSkill = 1024;
         // Enough skills to exceed the aggregate limit
         int skillCount = (limit / perSkill) + 1;
diff --git a/eng/skill-validator/tests/Shared/DiscoveryTests.cs b/eng/skill-validator/tests/Shared/DiscoveryTests.cs
index 43479e685c..b5133698dd 100644
--- a/eng/skill-validator/tests/Shared/DiscoveryTests.cs
+++ b/eng/skill-validator/tests/Shared/DiscoveryTests.cs
@@ -82,6 +82,44 @@ public async Task DiscoverSkillsRecursiveReturnsEmptyForMissingDir()
     }
 }
 
+public class ParseFrontmatterTests
+{
+    [Fact]
+    public void DisableModelInvocation_True_WhenTopLevelKeySet()
+    {
+        var content = "---\nname: my-skill\ndescription: A skill.\ndisable-model-invocation: true\n---\nBody";
+        var (metadata, _) = SkillDiscovery.ParseFrontmatter(content);
+        Assert.True(metadata.DisableModelInvocation);
+    }
+
+    [Fact]
+    public void DisableModelInvocation_False_WhenKeyAbsent()
+    {
+        var content = "---\nname: my-skill\ndescription: A skill.\n---\nBody";
+        var (metadata, _) = SkillDiscovery.ParseFrontmatter(content);
+        Assert.False(metadata.DisableModelInvocation);
+    }
+
+    [Fact]
+    public void DisableModelInvocation_False_WhenKeyAppearsInsideBlockScalarDescription()
+    {
+        // Regression: a previous regex-based check matched any line in the YAML,
+        // so a block-scalar description that merely mentions the key on its own
+        // line was wrongly treated as disabling model invocation. Proper YAML
+        // parsing must not be fooled by indented block-scalar content.
+        var content =
+            "---\n" +
+            "name: my-skill\n" +
+            "description: |\n" +
+            "  This skill explains config options.\n" +
+            "  disable-model-invocation: true\n" +
+            "---\n" +
+            "Body";
+        var (metadata, _) = SkillDiscovery.ParseFrontmatter(content);
+        Assert.False(metadata.DisableModelInvocation);
+    }
+}
+
 public class PluginDiscoveryTests
 {
     [Fact]
diff --git a/plugins/dotnet-test/skills/assertion-quality/SKILL.md b/plugins/dotnet-test/skills/assertion-quality/SKILL.md
index f1235d8160..6f9c6ad7f9 100644
--- a/plugins/dotnet-test/skills/assertion-quality/SKILL.md
+++ b/plugins/dotnet-test/skills/assertion-quality/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: assertion-quality
-description: "Analyzes the variety and depth of assertions across test suites in any language. Use when the user asks to evaluate assertion quality, find shallow tests, identify assertion-free tests (no assertions or only trivial ones like Assert.IsNotNull / toBeTruthy() / assert x is not None), flag self-referential or tautological assertions (output equals input on round-trip operations), measure assertion diversity, or audit whether tests verify different facets of behavior. Polyglot: .NET, Python (pytest), TS/JS (Jest/Vitest), Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, C++. DO NOT USE FOR: writing new tests (use code-testing-agent / writing-mstest-tests), checking whether tests would catch a bug if code changed (mutation reasoning — use test-gap-analysis), anti-patterns like flakiness or duplication, or a general severity-ranked anti-pattern audit even when focused on self-referential / tautological assertions and not asking for assertion-diversity metrics (use test-anti-patterns); fixing assertions."
+description: "Analyzes the variety and depth of assertions across test suites in any language. Use when the user asks to evaluate assertion quality, find shallow tests, identify assertion-free tests (no assertions or only trivial ones like Assert.IsNotNull / toBeTruthy()), flag self-referential or tautological assertions, measure assertion diversity, or audit whether tests verify different facets of behavior. Polyglot: .NET, Python, TS/JS, Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, C++. DO NOT USE FOR: writing new tests (use code-testing-agent / writing-mstest-tests), mutation reasoning about whether tests would catch a bug (use test-gap-analysis), or a general severity-ranked anti-pattern audit (use test-anti-patterns); fixing assertions."
 license: MIT
 ---
 
diff --git a/plugins/dotnet-test/skills/coverage-analysis/SKILL.md b/plugins/dotnet-test/skills/coverage-analysis/SKILL.md
index 2193c48e29..a6de1e03ad 100644
--- a/plugins/dotnet-test/skills/coverage-analysis/SKILL.md
+++ b/plugins/dotnet-test/skills/coverage-analysis/SKILL.md
@@ -9,13 +9,10 @@ description: >
   USE FOR: coverage stuck, coverage plateau, can't increase coverage, what's
   blocking coverage, coverage gap, CRAP scores, risk hotspots, where to add
   tests, coverage analysis, coverage report.
-  DO NOT USE FOR: targeted single-method CRAP analysis (use crap-score),
-  auditing test code for the "coverage-touching" anti-pattern (tests that
-  execute / call code but assert nothing, inflating coverage without
-  verifying behavior) — that is a test-code quality audit, use
-  test-anti-patterns; writing tests; running tests without coverage, or
-  troubleshooting test execution (use run-tests). This skill requires or
-  produces coverage (Cobertura) and CRAP metrics.
+  DO NOT USE FOR: targeted single-method CRAP analysis (use crap-score);
+  auditing test code for coverage-touching or other anti-patterns (use
+  test-anti-patterns); writing tests; running tests (use run-tests). Requires
+  or produces coverage (Cobertura) and CRAP metrics.
 license: MIT
 ---
 
diff --git a/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md b/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md
deleted file mode 100644
index f51251ce24..0000000000
--- a/plugins/dotnet-test/skills/find-untested-sources-polyglot/SKILL.md
+++ /dev/null
@@ -1,205 +0,0 @@
----
-name: find-untested-sources-polyglot
-description: >
-  Polyglot, parse-only static analysis that pairs source files with
-  referencing tests across Python, TypeScript/JavaScript, Go, Java, Rust,
-  C#, and Ruby. JSON shape matches `find-untested-sources`.
-  USE FOR: where to write tests next, find untested files, list sources
-  without tests, polyglot test-pairing map.
-  DO NOT USE FOR: coverage, CRAP risk. For .NET-only repos prefer
-  `find-untested-sources`.
-disable-model-invocation: true
-license: MIT
----
-
-# Find Untested Sources (Polyglot)
-
-## Purpose
-
-Coverage tools answer "which lines were executed?" — they require a green build
-and a passing test run, which is minutes-to-tens-of-minutes on a real repo.
-The question this skill answers is different and much cheaper:
-
-> _Which source files have no test file referencing any of their declared
-> symbols?_
-
-That's the question an agent asks **before** writing a new test — and it can be
-answered statically in a few seconds by parsing every recognized source file
-with [tree-sitter](https://tree-sitter.github.io/), with **no build, no
-dependency resolution, no compilation**.
-
-This is the polyglot sibling of the C# `find-untested-sources` skill. The
-output schema is intentionally compatible so the same prompt patterns can
-consume either tool.
-
-## When to Use
-
-- The repository is not exclusively C#, or you want a tool that works
-  uniformly across multiple languages without per-language plumbing.
-- User asks "where should I add tests?", "which files have no tests?", "find
-  untested code", "give me a test gap list", "what's the next file to test".
-- Before invoking a test-generation agent, to produce a prioritized worklist.
-- After generating tests, to verify each new test file pairs to a source file.
-
-## When Not to Use
-
-- **C#-only repo** — prefer `find-untested-sources`. Its Roslyn-based
-  namespace disambiguation is strictly better than this skill's identifier
-  overlap on duplicated short names like `Settings` or `Context`.
-- **Line/branch coverage** — use language-native coverage tooling.
-- **Are existing tests strong?** — use `test-gap-analysis` or
-  `assertion-quality`.
-
-## Inputs
-
-| Input | Required | Default | Description |
-|-------|----------|---------|-------------|
-| Repo root | Yes | — | Directory to scan recursively. |
-| `--lang LANG` | No | all | Restrict to a language (repeatable). One of `python`, `typescript`, `tsx`, `javascript`, `go`, `java`, `rust`, `csharp`, `ruby`. |
-| `--limit-untested N` | No | 0 (no limit) | Truncate the untested list to N entries. |
-| `--include-tested` | No | off | Include `tested_sources` in the payload (large). |
-
-### Prerequisites
-
-- Python 3.10+.
-- `pip install tree-sitter-language-pack` (single self-contained wheel that
-  bundles parsers for 300+ languages and the high-level `process()` API used
-  here). No native build, no per-language grammar install.
-
-## Usage
-
-```powershell
-# From the skill folder
-python scripts/find_untested_sources.py <repo-root>
-
-# Restrict to a language
-python scripts/find_untested_sources.py <repo-root> --lang python --lang typescript
-
-# Truncate the report (top 20 by declared API surface)
-python scripts/find_untested_sources.py <repo-root> --limit-untested 20 > pairing.json
-
-# Iterate, highest-API-surface first
-$report = Get-Content pairing.json | ConvertFrom-Json
-$report.untested_sources | Select-Object -First 10 path, declaration_count, suggested_test_path
-```
-
-Diagnostics go to stderr; JSON goes to stdout.
-
-## Output Schema
-
-```jsonc
-{
-  "repo_root": "<absolute path>",
-  "summary": {
-    "source_files": 3138,
-    "test_files": 761,
-    "tested_source_files": 1419,
-    "untested_source_files": 1719,
-    "orphan_test_files": 15,
-    "languages": ["csharp"]
-  },
-  "untested_sources": [
-    {
-      "path": "src/Foo/Bar.cs",
-      "language": "csharp",
-      "declaration_count": 8,
-      "declarations": ["Bar", "BarOptions", "IBar", "..."],
-      "suggested_test_path": "src/Foo/BarTests.cs"
-    }
-  ],
-  "orphan_tests": [
-    { "path": "tests/SomeIntegrationTest.cs", "language": "csharp" }
-  ]
-}
-```
-
-Pass `--include-tested` to additionally emit `tested_sources` (same shape as
-`untested_sources` but with a `covering_tests` array instead of a suggested
-path). Omitted by default to keep the payload small for LLM consumption.
-
-## How It Works
-
-1. **File discovery** — recursive directory walk pruning common build/vendor
-   dirs (`bin`, `obj`, `node_modules`, `target`, `dist`, `build`, `vendor`,
-   `__pycache__`, `.venv`, `.git`, etc.). Skips generated files (`.d.ts`,
-   `.g.cs`, `.Designer.cs`, `_pb2.py`, `*.min.js`, `AssemblyInfo.cs`, ...).
-
-2. **Language detection** — `tree_sitter_language_pack.detect_language_from_path`
-   maps the extension to one of the supported languages. Unknown extensions
-   are skipped silently.
-
-3. **Test-vs-source classification** — per-language path heuristics:
-
-   | Language | Test rule |
-   |---|---|
-   | Python | path contains `tests/` or `test/`; or filename starts with `test_` or ends with `_test.py`; or `conftest.py`. |
-   | JS/TS/TSX | path contains `__tests__`, `tests`, `test`, `spec`, or `e2e`; or filename contains `.test.` or `.spec.`. |
-   | Go | filename ends with `_test.go` (Go's standard convention). |
-   | Java | path contains `test` or `tests`; or filename ends with `Test.java` / `Tests.java`. |
-   | Rust | path contains `tests/` or `benches/`. |
-   | C# | path contains `tests/`; or project segment ends with `.Tests`, `.Test`, `.UnitTests`, `.IntegrationTests`; or filename ends with `Tests`/`Test`. |
-   | Ruby | path contains `spec/`, `test/`; or filename ends with `_spec.rb` / `_test.rb`. |
-
-4. **Per-file extraction** — `tree_sitter_language_pack.process(text,
-   ProcessConfig(language=lang, structure=True, imports=True, symbols=True))`
-   returns:
-   - `structure` — top-level declared items (functions, classes, methods,
-     traits, ...) with their names. Used as the declared-symbol set.
-   - `imports` — raw import statements (e.g. `from foo import bar`,
-     `import "pkg/util"`, `using System.IO;`, `use crate::foo::Bar;`).
-   - `symbols` — flat declared-name list, unioned with `structure` (acts as
-     a fallback when `structure` is empty, and broadens coverage when both
-     are populated; declaration counts may exceed pure structure parsing).
-
-5. **Pairing** — for each test file, union the results of:
-   - **Import resolution** (per language):
-     - Python: `from pkg.mod import x` → `pkg/mod.py` or
-       `pkg/mod/__init__.py`.
-     - TS/JS: relative `./foo` / `../bar` → with `.ts`/`.tsx`/`.js`/`.jsx`
-       and `/index.*` candidate paths.
-     - Go: `"path/to/pkg"` → any source file whose final path segment
-       matches `pkg.go` in the index.
-     - Java: `import a.b.C;` → `a/b/C.java`.
-     - Rust: `use a::b::C;` → `b.rs` or `C.rs` (best-effort, no module tree).
-     - Ruby: `require 'foo/bar'` → `foo/bar.rb`.
-     - C#: `using` maps to namespaces, not files; intentionally a no-op —
-       falls through to identifier overlap below.
-   - **Identifier overlap** — every word-like token in the test source is
-     looked up in the source index of declared names (length ≥ 4 to keep
-     noise down). Any source whose declared name appears as a token in the
-     same-language test is paired.
-
-6. **JSON emit** — `untested_sources` is ordered by declaration count
-   descending so the highest-API-surface gap appears first.
-
-## Limitations
-
-This is a static, parse-only heuristic. It deliberately trades a small amount
-of accuracy for orders-of-magnitude lower cost than coverage. Known gaps:
-
-- **Reflection / DI-resolved types** that a test only references through a
-  string name or container resolution don't appear in the identifier scan.
-- **C#** specifically: namespace disambiguation is the C# tool's strength;
-  this polyglot version intentionally skips it. If you have a .NET-only
-  repository, prefer the Roslyn-based `find-untested-sources`.
-- **Short identifier names** (< 4 chars) are dropped from the overlap index
-  to avoid noisy pairings on names like `id`, `db`, `Tag`.
-- **Cross-language tests** (Python tests driving a Go binary, etc.) are
-  recorded as orphan tests since same-language pairing is the rule.
-- **Monorepo path aliases** (TS path mapping, Java module-info) are not
-  resolved; the suffix-match fallback may pick the wrong source if two files
-  share a trailing path segment in different sub-projects.
-
-For these cases, run actual coverage on the unpaired candidates the agent
-has already triaged.
-
-## Outputs the agent should consume
-
-- `untested_sources[*].path` — pick the next source file to test (highest
-  `declaration_count` first).
-- `untested_sources[*].suggested_test_path` — drop-in target for the new
-  test file using the per-language convention.
-- (With `--include-tested`) `tested_sources[*].covering_tests` — verify a
-  newly written test file lands in the list for the intended source.
-- `orphan_tests` — tests that don't appear to reference any same-language
-  source file; useful for triaging stale tests or integration-only tests.
diff --git a/plugins/dotnet-test/skills/find-untested-sources/SKILL.md b/plugins/dotnet-test/skills/find-untested-sources/SKILL.md
index 4b722fd1ca..14cd1420d1 100644
--- a/plugins/dotnet-test/skills/find-untested-sources/SKILL.md
+++ b/plugins/dotnet-test/skills/find-untested-sources/SKILL.md
@@ -1,14 +1,14 @@
 ---
 name: find-untested-sources
 description: >
-  Parse-only C# analysis that pairs source files with referencing tests and
-  emits JSON: `source_to_tests`, `untested` ordered by declaration count, and
-  `suggested_test_path` from `ProjectReference` edges.
-  USE FOR: where to write tests next, find untested files, list sources
-  without tests, build a test-pairing map.
-  DO NOT USE FOR: coverage (use `coverage-analysis`), CRAP risk ranking,
-  assertion gaps.
-disable-model-invocation: true
+  Parse-only static analysis that pairs source files with the tests referencing
+  them and emits JSON listing untested files ordered by API surface, each with a
+  suggested_test_path. Roslyn engine for C#/.NET (namespace-aware), tree-sitter
+  engine for polyglot repos (Python, TS/JS, Go, Java, Rust, Ruby).
+  USE FOR: where to write tests next, which files have no tests, find untested
+  code, build a source-to-test pairing map, prioritized test-gap worklist.
+  DO NOT USE FOR: line/branch coverage or CRAP risk (use coverage-analysis);
+  whether existing tests are strong (use test-gap-analysis or assertion-quality).
 license: MIT
 ---
 
@@ -17,16 +17,29 @@ license: MIT
 ## Purpose
 
 Coverage tools answer "which lines were executed?" — they require a green build
-and a passing test run, which is minutes-to-tens-of-minutes on a real repo.
-The question this skill answers is different and much cheaper:
+and a passing test run, which is minutes-to-tens-of-minutes on a real repo. The
+question this skill answers is different and much cheaper:
 
-> _Which C# source files have no test file referencing any of their declared types?_
+> _Which source files have no test file referencing any of their declared
+> types/symbols?_
 
 That's the question an agent asks **before** writing a new test — and it can be
-answered statically in a few seconds by parsing every `.cs` file with the
-Roslyn syntax API, with **no `Compilation`, no `MetadataReference`, and no
-binding**. The output is a deterministic test-pairing map that lets the agent
-pick the next file to test without reading the entire codebase first.
+answered statically in a few seconds by parsing source files, with **no build,
+no dependency resolution, and no compilation**. The output is a deterministic
+test-pairing map that lets the agent pick the next file to test without reading
+the entire codebase first.
+
+## Two engines — pick one
+
+This skill ships two interchangeable analyzers with a compatible JSON contract:
+
+| Engine | Script | Use when |
+|--------|--------|----------|
+| **Roslyn (C#)** | `scripts/Find-UntestedSources.cs` | The repo is **.NET-only**. Parses every `.cs` file with the Roslyn syntax API and does strict **namespace disambiguation**, so it is materially more accurate on duplicated short names like `Settings` or `Context`. |
+| **tree-sitter (polyglot)** | `scripts/find_untested_sources.py` | The repo is **not exclusively C#**, or you want one tool across Python, TypeScript/JavaScript, Go, Java, Rust, Ruby, and C#. |
+
+For a .NET-only repository, **prefer the Roslyn engine** — its namespace-aware
+pairing beats the polyglot engine's identifier overlap.
 
 ## When to Use
 
@@ -34,7 +47,7 @@ pick the next file to test without reading the entire codebase first.
   untested code", "give me a test gap list", "what's the next file to test".
 - Before invoking a test-generation agent, to produce a prioritized worklist.
 - After generating tests, to verify each new test file pairs to a source file.
-- To enumerate "weakly paired" source files (only one referring test file) for
+- To enumerate "weakly paired" source files (only one referring test) for
   follow-up depth checks.
 
 ## When Not to Use
@@ -43,23 +56,17 @@ pick the next file to test without reading the entire codebase first.
 - **CRAP-score / risk hotspots** — use `coverage-analysis`.
 - **Are existing tests strong?** — use `test-gap-analysis` (mutation reasoning)
   or `assertion-quality`.
-- **Tests for non-C# code** — this prototype is C#-only.
-
-## Inputs
 
-| Input | Required | Default | Description |
-|-------|----------|---------|-------------|
-| Repo root | Yes | — | Directory to scan recursively for `.cs` files. |
-| `--top N` | No | all | Truncate the `untested` list to the top N entries by declaration count. |
+## Roslyn engine (C#)
 
 ### Prerequisites
 
-- .NET SDK that supports file-based apps (`dotnet run script.cs`). Pinned in
-  the repo's `global.json` (SDK 11 preview or later).
+- .NET SDK that supports file-based apps (`dotnet run script.cs`). Pinned in the
+  repo's `global.json` (SDK 11 preview or later).
 - No internet access required beyond the initial NuGet restore of
   `Microsoft.CodeAnalysis.CSharp` on first run.
 
-## Usage
+### Usage
 
 ```powershell
 # From the skill folder
@@ -75,7 +82,7 @@ $report.untested | Select-Object -First 10 source, decl_count, suggested_test_pa
 
 Diagnostics go to stderr; JSON goes to stdout.
 
-## Output Schema
+### Output schema
 
 ```jsonc
 {
@@ -104,68 +111,149 @@ Diagnostics go to stderr; JSON goes to stdout.
 }
 ```
 
-## How It Works
-
-1. **File discovery** — recursive directory walk pruning `bin/`, `obj/`,
-   `node_modules/`, `.git/`, `.vs/`, `packages/`, and any dotted subdir.
-   Skips generated files (`.g.cs`, `.Designer.cs`, `.AssemblyInfo.cs`).
+### How it works
 
+1. **File discovery** — recursive walk pruning `bin/`, `obj/`, `node_modules/`,
+   `.git/`, `.vs/`, `packages/`, and any dotted subdir. Skips generated files
+   (`.g.cs`, `.Designer.cs`, `.AssemblyInfo.cs`).
 2. **Test vs source classification** — walks up to the nearest `.csproj` and
-   marks it as a test project if (a) the project name ends in `.Tests`,
-   `.Test`, `.UnitTests`, `.IntegrationTests`, `.E2E`, `.EndToEnd`, `.Spec`,
-   `.Specs`, or (b) the file content references `Microsoft.NET.Test.Sdk`,
-   `MSTest.Sdk`, `Microsoft.Testing.Platform`, `xunit`, `NUnit`, `TUnit`, or
+   marks it a test project if the project name ends in `.Tests`, `.Test`,
+   `.UnitTests`, `.IntegrationTests`, `.E2E`, `.EndToEnd`, `.Spec`, `.Specs`, or
+   the content references `Microsoft.NET.Test.Sdk`, `MSTest.Sdk`,
+   `Microsoft.Testing.Platform`, `xunit`, `NUnit`, `TUnit`, or
    `<IsTestProject>true</IsTestProject>`.
-
-3. **Source index (parallel)** — for each source file, parse with
-   `CSharpSyntaxTree.ParseText` (syntax only, no compilation). Walk every
-   `BaseTypeDeclarationSyntax` and `DelegateDeclarationSyntax` and record
+3. **Source index (parallel)** — parse each source file with
+   `CSharpSyntaxTree.ParseText` (syntax only, no compilation); record every
+   `BaseTypeDeclarationSyntax` / `DelegateDeclarationSyntax` as
    `(ShortName, EnclosingNamespace, FilePath)`.
+4. **Test scan (parallel)** — parse each test file, collect `using` directives +
+   enclosing namespace, walk every `IdentifierToken`, look it up in the
+   short-name index, and **disambiguate strictly**: an identifier is attributed
+   only if the declaration's namespace matches one of the test file's `using`
+   directives, the enclosing namespace, or a prefix of them. This avoids noise
+   where common names like `Settings` or `Context` match every project.
+5. **Pairing & suggestion** — invert into `source → [tests]`. Build a
+   production-to-test project map from `<ProjectReference>` entries; for each
+   untested source, mirror its in-project relative path under the referencing
+   test project to suggest a path.
+6. **JSON emit** — ordered by declaration count desc, then alphabetical.
 
-4. **Test scan (parallel)** — for each test file, parse, collect `using`
-   directives + enclosing namespace, walk every `IdentifierToken`, look it up
-   in the short-name index, and **disambiguate strictly**: an identifier is
-   attributed to a declaration only if the declaration's namespace matches one
-   of the test file's `using` directives, the enclosing namespace, or a
-   prefix of them. Identifiers that don't resolve under that constraint are
-   dropped (avoids the noise where common names like `Settings` or `Context`
-   would otherwise match every project that happens to declare them).
+## Polyglot engine (tree-sitter)
 
-5. **Pairing & suggestion** — invert into `source → [tests]`. Build a
-   production-to-test project map from `<ProjectReference>` entries in test
-   `.csproj` files; for each untested source, mirror its in-project relative
-   path under the referencing test project and append `Tests.cs` to suggest a
-   path.
+### Prerequisites
 
-6. **JSON emit** — ordered by declaration count desc, then alphabetical.
+- Python 3.10+.
+- `pip install tree-sitter-language-pack` (single self-contained wheel that
+  bundles parsers for 300+ languages and the high-level `process()` API). No
+  native build, no per-language grammar install.
+
+### Usage
+
+```powershell
+# From the skill folder
+python scripts/find_untested_sources.py <repo-root>
+
+# Restrict to a language (repeatable)
+python scripts/find_untested_sources.py <repo-root> --lang python --lang typescript
+
+# Truncate the report (top 20 by declared API surface)
+python scripts/find_untested_sources.py <repo-root> --limit-untested 20 > pairing.json
+
+# Iterate, highest-API-surface first
+$report = Get-Content pairing.json | ConvertFrom-Json
+$report.untested_sources | Select-Object -First 10 path, declaration_count, suggested_test_path
+```
+
+Pass `--include-tested` to additionally emit `tested_sources` (omitted by
+default to keep the payload small for LLM consumption). Diagnostics go to
+stderr; JSON goes to stdout.
+
+### Output schema
+
+```jsonc
+{
+  "repo_root": "<absolute path>",
+  "summary": {
+    "source_files": 3138,
+    "test_files": 761,
+    "tested_source_files": 1419,
+    "untested_source_files": 1719,
+    "orphan_test_files": 15,
+    "languages": ["csharp"]
+  },
+  "untested_sources": [
+    {
+      "path": "src/Foo/Bar.cs",
+      "language": "csharp",
+      "declaration_count": 8,
+      "declarations": ["Bar", "BarOptions", "IBar", "..."],
+      "suggested_test_path": "src/Foo/BarTests.cs"
+    }
+  ],
+  "orphan_tests": [
+    { "path": "tests/SomeIntegrationTest.cs", "language": "csharp" }
+  ]
+}
+```
+
+### How it works
+
+1. **File discovery** — recursive walk pruning common build/vendor dirs (`bin`,
+   `obj`, `node_modules`, `target`, `dist`, `build`, `vendor`, `__pycache__`,
+   `.venv`, `.git`, …) and generated files (`.d.ts`, `.g.cs`, `.Designer.cs`,
+   `_pb2.py`, `*.min.js`, `AssemblyInfo.cs`, …).
+2. **Language detection** — `detect_language_from_path` maps the extension to a
+   supported language; unknown extensions are skipped.
+3. **Test-vs-source classification** — per-language path heuristics:
+
+   | Language | Test rule |
+   |---|---|
+   | Python | path contains `tests/`/`test/`; or filename starts with `test_` or ends `_test.py`; or `conftest.py`. |
+   | JS/TS/TSX | path contains `__tests__`, `tests`, `test`, `spec`, `e2e`; or filename contains `.test.`/`.spec.`. |
+   | Go | filename ends `_test.go`. |
+   | Java | path contains `test`/`tests`; or filename ends `Test.java`/`Tests.java`. |
+   | Rust | path contains `tests/`/`benches/`. |
+   | C# | path contains `tests/`; or project segment ends `.Tests`/`.Test`/`.UnitTests`/`.IntegrationTests`; or filename ends `Tests`/`Test`. |
+   | Ruby | path contains `spec/`/`test/`; or filename ends `_spec.rb`/`_test.rb`. |
+
+4. **Per-file extraction** — `process(text, ProcessConfig(structure, imports,
+   symbols))` returns declared items, raw import statements, and a flat declared
+   -name list.
+5. **Pairing** — for each test file, union **import resolution** (per language,
+   e.g. Python `from pkg.mod import x` → `pkg/mod.py`; Java `import a.b.C;` →
+   `a/b/C.java`; C# `using` is namespace-not-file, so a no-op) with **identifier
+   overlap** (word-like tokens, length ≥ 4, matched against declared names).
+6. **JSON emit** — `untested_sources` ordered by declaration count descending.
 
 ## Limitations (be honest with the agent)
 
-This is a static, parse-only heuristic. It deliberately trades a small amount
-of accuracy for orders-of-magnitude lower cost than coverage. Known gaps:
-
-- **Reflection-driven tests** that exercise a type only via
-  `Type.GetType(...)` / `Activator.CreateInstance` won't be detected — the
-  type's short name never appears in the test source.
-- **DI-resolved types** referenced only by `IServiceProvider.GetRequiredService<T>()`
-  where `T` is an interface and the implementation isn't named in the test.
-- **Extension methods** invoked as instance methods. The extension class is
-  not named, only the method, so the source file declaring the static class
-  is not credited.
-- **`var`, target-typed `new()`, and pattern matching** lose the type token;
-  the file-level union usually still catches it through other references.
-- **Cross-language**: any source file driven by JSON/YAML test fixtures, code
-  generators, or compiled-only references is not detected.
+Both engines are static, parse-only heuristics that trade a little accuracy for
+orders-of-magnitude lower cost than coverage. Known gaps:
+
+- **Reflection / DI-resolved types** referenced only via a string name or
+  container resolution won't be detected — the type's short name never appears
+  in the test source.
+- **Extension methods** invoked as instance methods (C#): the declaring static
+  class is not named, so its file is not credited.
+- **`var`, target-typed `new()`, pattern matching** lose the type token; the
+  file-level union usually still catches it through other references.
+- **Short identifier names** (polyglot, < 4 chars) are dropped to avoid noisy
+  pairings on names like `id`, `db`, `Tag`.
+- **Monorepo path aliases** (TS path mapping, Java module-info) are not
+  resolved; a suffix-match fallback may pick the wrong source if two files share
+  a trailing path segment.
 
 For these cases, run actual coverage (`coverage-analysis`) on the unpaired
 candidates the agent has already triaged.
 
 ## Outputs the agent should consume
 
-- `untested[*].source` — pick the next source file to test (highest
-  `decl_count` first).
-- `untested[*].suggested_test_path` — drop-in target for the new test file;
-  honors the test project that already `<ProjectReference>`s the source's
+- `untested[*].source` / `untested_sources[*].path` — pick the next source file
+  to test (highest declaration count first).
+- `*.suggested_test_path` — drop-in target for the new test file; the Roslyn
+  engine honors the test project that already `<ProjectReference>`s the source's
   project, so `dotnet sln add` is not needed.
-- `source_to_tests` — verify a newly written test file lands in the list for
-  the intended source.
+- `source_to_tests` (Roslyn) / `--include-tested` `tested_sources` (polyglot) —
+  verify a newly written test file lands in the list for the intended source.
+- `orphan_tests` (polyglot) — tests that don't reference any same-language
+  source file; useful for triaging stale or integration-only tests.
diff --git a/plugins/dotnet-test/skills/find-untested-sources-polyglot/scripts/find_untested_sources.py b/plugins/dotnet-test/skills/find-untested-sources/scripts/find_untested_sources.py
similarity index 100%
rename from plugins/dotnet-test/skills/find-untested-sources-polyglot/scripts/find_untested_sources.py
rename to plugins/dotnet-test/skills/find-untested-sources/scripts/find_untested_sources.py
diff --git a/plugins/dotnet-test/skills/grade-tests/SKILL.md b/plugins/dotnet-test/skills/grade-tests/SKILL.md
index 95ea216bb9..be5533d0d8 100644
--- a/plugins/dotnet-test/skills/grade-tests/SKILL.md
+++ b/plugins/dotnet-test/skills/grade-tests/SKILL.md
@@ -6,12 +6,10 @@ description: >
   score band, and a one-line note — designed to be posted as a PR comment.
   Use when the caller wants per-test feedback on a curated list of methods
   (for example, the new or modified tests in a pull request), not a
-  suite-wide audit. Polyglot: .NET (MSTest/xUnit/NUnit/TUnit), Python
-  (pytest/unittest), TS/JS (Jest/Vitest/Mocha/node:test), Java (JUnit/TestNG),
-  Go, Ruby (RSpec/Minitest), Rust, Swift (XCTest/Swift Testing), Kotlin
-  (JUnit/Kotest), PowerShell (Pester), C++ (GoogleTest/Catch2/doctest).
-  Input is a list of test methods (or method bodies / file+line spans);
-  output is a compact markdown table plus a short summary. DO NOT USE FOR:
+  suite-wide audit. Polyglot: .NET, Python, TS/JS, Java, Go, Ruby, Rust,
+  Swift, Kotlin, PowerShell, C++. Input is a list of test methods (or method
+  bodies / file+line spans); output is a compact markdown table plus a short
+  summary. DO NOT USE FOR:
   full suite audits (use test-quality-auditor agent or test-anti-patterns),
   writing new tests (use code-testing-generator agent or writing-mstest-tests),
   fixing failures, or measuring code coverage.
diff --git a/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md b/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md
index 98add4909a..9234863cb0 100644
--- a/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md
+++ b/plugins/dotnet-test/skills/migrate-static-to-wrapper/SKILL.md
@@ -6,12 +6,11 @@ description: >
   Performs codemod-style bulk replacement of DateTime.UtcNow to TimeProvider.GetUtcNow(),
   File.ReadAllText to IFileSystem, and similar transformations. Adds constructor
   injection parameters and updates DI registration.
-  USE FOR: replace DateTime.UtcNow with TimeProvider, replace DateTime.Now with
-  TimeProvider, migrate static calls to wrapper, bulk replace File.* with IFileSystem,
-  codemod static to injectable, add constructor injection for time provider,
-  mechanical migration of statics, refactor DateTime to TimeProvider, swap static
-  for injected dependency, convert static calls to use abstraction, replace statics
-  in a class, migrate one file to TimeProvider, scoped migration, update call sites.
+  USE FOR: replace DateTime.Now/UtcNow with TimeProvider, migrate static calls
+  to wrapper, bulk replace File.* with IFileSystem, codemod static to
+  injectable, add constructor injection for a dependency, mechanical or scoped
+  migration of statics, convert static calls to use an abstraction, update call
+  sites.
   DO NOT USE FOR: detecting statics (use detect-static-dependencies), generating
   wrappers (use generate-testability-wrappers), migrating between test frameworks.
 license: MIT
diff --git a/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md b/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md
index a3c90b6df6..e262a89b1d 100644
--- a/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md
+++ b/plugins/dotnet-test/skills/test-anti-patterns/SKILL.md
@@ -5,18 +5,15 @@ description: >
   and quality issues — produces a severity-ranked report
   (Critical/Warning/Info). INVOKE whenever asked to audit or review tests,
   find what's wrong with a suite, judge whether tests are any good, or
-  check tests for: tests that pass but verify nothing, no/missing
-  assertions, swallowed exceptions, self-comparing/self-referential/
-  tautological assertions (output==input on round-trip/identity ops),
-  coverage-touching tests (every method called but nothing verified),
-  broad exceptions, flaky or order-dependent tests (Thread.Sleep,
-  DateTime.Now, time.sleep, shared state, reflection coupling),
-  duplicated tests, magic values — in .NET, Python/pytest, TS/Jest, Java,
-  Go, Ruby or C++. DO NOT USE FOR: writing new tests (use
-  code-testing-agent); running tests (use run-tests); migration;
-  assertion-diversity metrics (use assertion-quality); coverage/CRAP
-  metrics (use coverage-analysis); the testsmells.org academic catalog
-  (use test-smell-detection).
+  check for: tests that pass but verify nothing, missing assertions,
+  swallowed exceptions, self-comparing / tautological assertions,
+  coverage-touching tests, broad exceptions, flaky or order-dependent tests
+  (Thread.Sleep, DateTime.Now, shared state), duplicated tests, or magic
+  values — in .NET, Python/pytest, TS/Jest, Java, Go, Ruby or C++. DO NOT
+  USE FOR: writing new tests (use code-testing-agent); running tests (use
+  run-tests); migration; assertion-diversity metrics (use assertion-quality);
+  coverage/CRAP metrics (use coverage-analysis); the testsmells.org academic
+  catalog (use test-smell-detection).
 license: MIT
 ---
 
diff --git a/plugins/dotnet-test/skills/test-tagging/SKILL.md b/plugins/dotnet-test/skills/test-tagging/SKILL.md
index 52ca46d407..e2e0af2717 100644
--- a/plugins/dotnet-test/skills/test-tagging/SKILL.md
+++ b/plugins/dotnet-test/skills/test-tagging/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: test-tagging
-description: "Analyzes test suites in any language and tags each test with a standardized set of traits (positive, negative, critical-path, boundary, smoke, regression, integration, performance, security). Use when the user wants to categorize, audit, or label tests with traits. Works with .NET (MSTest TestCategory / xUnit Trait / NUnit Category / TUnit Property), Python (pytest markers; unittest has no canonical tag syntax so report-only), TypeScript/JavaScript (Jest/Vitest test names, describe-block conventions), Java (JUnit 5 @Tag / TestNG groups), Go (subtest naming / build tags / file _test.go), Ruby (RSpec metadata), Rust (cargo test naming / cfg attributes), Swift (XCTest test plans / Swift Testing @Tag), Kotlin (JUnit @Tag / Kotest tags), PowerShell (Pester -Tag), C++ (GoogleTest filter prefixes / Catch2 [tags] / doctest decorators). Auto-edits when the framework has canonical syntax; falls back to report-only otherwise. Do not use for writing new tests, running tests, or migrating frameworks."
+description: "Analyzes test suites in any language and tags each test with standardized traits (positive, negative, critical-path, boundary, smoke, regression, integration, performance, security). Use when the user wants to categorize, audit, or label tests with traits. Works across .NET (MSTest/xUnit/NUnit/TUnit), Python (pytest), TS/JS (Jest/Vitest), Java, Go, Ruby, Rust, Swift, Kotlin, PowerShell, and C++ — auto-editing when the framework has canonical tag syntax, otherwise report-only. Do not use for writing new tests, running tests, or migrating frameworks."
 license: MIT
 ---