From 773033d5688f7b610318d6a422f7c1d8123f2a3e Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Mon, 18 May 2026 21:59:49 +0800 Subject: [PATCH 01/19] =?UTF-8?q?feat:=20V1.37.0=20=E2=80=94=20Phase=202?= =?UTF-8?q?=20charter=20closure=20(3=20items)=20+=20HIGH/MEDIUM=20audit=20?= =?UTF-8?q?fixes=20(6=20items)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added (13 new files): - Bug tracker 5 adapters: jira/github/linear/webhook bug managers - Quality gate engine: quality_gate_engine.py + quality_gates.yaml - Layered requirements: 6 tiered requirement files (base/mobile/desktop/visual/system/ai/perf) - CI runtime compileall check Fixed HIGH (2): - H16: Expert count clarified (9含test-lead vs 8被协调) - H18: Skills README completed (13→32 business + 3 meta skills) Fixed MEDIUM (4): - M12: run_file BackgroundTasks unified - M14: RACI matrix 18 columns (pentest+automotive) - M15: requires_layer documented in CONTRIBUTING.md - M19: automotive-test checker reference fixed Changed: - Utils count: 67 → 73 (6 new .py files) - Version: 1.36.0 → 1.37.0 across VERSION/__init__/pyproject/package.json - Charter: Bug多适配 ✅ / 按需安装 ✅ / 门禁YAML ✅ --- .github/workflows/ci.yml | 1 + .pre-commit-config.yaml | 2 +- ...71\347\233\256\345\257\274\350\210\252.md" | 6 +- ...77\347\224\250\346\211\213\345\206\214.md" | 8 +- ...50\347\275\262\350\257\264\346\230\216.md" | 6 +- ...15\347\275\256\346\270\205\345\215\225.md" | 4 +- ...13\350\257\225\344\270\273\347\256\241.md" | 2 +- .../README.md" | 2 +- .../README.md" | 49 ++++ .../automotive-test.md" | 2 +- .../darwin-skill/scripts/screenshot.mjs" | 14 +- .../karpathy-guidelines/SKILL.md" | 1 + .../nuwa-skill/references/skill-template.md" | 1 - .../nuwa-skill/scripts/merge_research.py" | 5 +- .../nuwa-skill/scripts/quality_check.py" | 5 +- .../security-review.md" | 2 +- .../test-coordinator.md" | 2 +- .../llm-providers.md" | 2 +- .../quality_gates.yaml" | 30 ++ .../templates/INDEX.md" | 2 +- .../README.md" | 8 +- .../bug_tracker_base.py" | 94 ++++++ .../github_bug_manager.py" | 149 ++++++++++ .../jira_bug_manager.py" | 163 +++++++++++ .../linear_bug_manager.py" | 168 +++++++++++ .../quality_gate_engine.py" | 272 ++++++++++++++++++ .../webhook_bug_manager.py" | 116 ++++++++ CHANGELOG.md | 131 ++++++++- CONTRIBUTING.md | 96 ++++--- FULL_GUIDE.md | 8 +- README.md | 4 +- README.zh-CN.md | 2 +- ROADMAP.md | 20 +- VERSION | 2 +- desktop/package.json | 2 +- docs/charter/01-vision-dimensions.md | 6 +- docs/charter/02-coverage-matrix.md | 6 +- docs/charter/04-skills-bugtracker.md | 8 +- docs/charter/05-install-deploy.md | 8 +- docs/charter/06-test-architecture.md | 8 +- docs/charter/07-runtime-license.md | 24 +- .../theory/13-build-your-own/byox-shell.zh.md | 2 +- examples/web-demo/README.md | 2 +- install.sh | 6 +- mobile/package.json | 2 +- requirements/README.md | 24 ++ requirements/ai.txt | 10 + requirements/base.txt | 64 +++++ requirements/desktop.txt | 8 + requirements/mobile.txt | 6 + requirements/perf.txt | 6 + requirements/system.txt | 13 + requirements/visual.txt | 11 + runtime/INDEX.md | 4 +- runtime/__init__.py | 6 +- runtime/api/endpoints/stream.py | 4 +- runtime/api/main.py | 16 +- runtime/api/result_store.py | 2 +- runtime/backends/docker.py | 2 +- runtime/backends/ssh.py | 4 +- runtime/compliance/engine.py | 5 - runtime/marketplace/catalog.py | 5 +- runtime/mcp/__init__.py | 2 +- runtime/observability/apm_export.py | 2 +- runtime/observability/prometheus_metrics.py | 13 +- runtime/orchestrator/INDEX.md | 4 +- .../adapters/perf_orchestrator.py | 3 +- runtime/orchestrator/agents/INDEX.md | 2 +- runtime/orchestrator/agents/__init__.py | 2 +- runtime/orchestrator/agents/base.py | 5 +- runtime/orchestrator/direct.py | 6 +- runtime/orchestrator/hooks.py | 8 +- runtime/orchestrator/release_readiness.py | 3 +- runtime/orchestrator/skills/__init__.py | 2 +- runtime/pyproject.toml | 2 +- runtime/router/llm_client.py | 6 +- runtime/subagent/pool.py | 4 +- runtime/tests/test_cli_commands.py | 7 +- runtime/tests/test_impl_status_filter.py | 6 +- .../testcases_sample.xlsx" | Bin 9923 -> 9923 bytes 80 files changed, 1536 insertions(+), 184 deletions(-) create mode 100644 "04-\351\205\215\347\275\256\346\226\207\344\273\266/quality_gates.yaml" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/bug_tracker_base.py" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/github_bug_manager.py" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/jira_bug_manager.py" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/linear_bug_manager.py" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/quality_gate_engine.py" create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/webhook_bug_manager.py" create mode 100644 requirements/README.md create mode 100644 requirements/ai.txt create mode 100644 requirements/base.txt create mode 100644 requirements/desktop.txt create mode 100644 requirements/mobile.txt create mode 100644 requirements/perf.txt create mode 100644 requirements/system.txt create mode 100644 requirements/visual.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index accfd4c..e612995 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,6 +33,7 @@ jobs: - name: 语法检查(py_compile) run: | python -m compileall 05-代码示例/ -q + python -m compileall runtime/ -q # ===== 2. 依赖漏洞扫描 ===== security-audit: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ffac747..c6c6686 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: # 文件统计校验(防误删 agent/skill/utils) - id: file-count-check name: Agents/Skills/Utils 数量校验 - entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "67" ] || { echo "❌ Utils 数量异常(期 67,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' + entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "73" ] || { echo "❌ Utils 数量异常(期 73,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' language: system pass_filenames: false always_run: true diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 542affa..e7c488b 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -7,7 +7,7 @@ ## 零、V1.1.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 49 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。 +> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 67 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。 | 模块 | 路径 | 用途 | |------|------|------| @@ -264,10 +264,10 @@ - **32 业务 Skill + 3 元 Skill**(总 35 单元) - 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1 - 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` -- **49 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) +- **73 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) - **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等) - **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url) -- **覆盖率约 95%**(剩 5% 为高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) +- **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) --- diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 3ce8246..8144e6b 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -13,7 +13,7 @@ Test-Agent/ ─部署→ your-test-project/ ├── 02-专家定义/ ├── .claude/agents/ ├── 03-技能定义/ ├── .claude/skills/ ├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ├── utils/(49 个 .py) +├── 05-代码示例/ ├── utils/(73 个 .py) └── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile ├── src/ ← 你的被测系统源码 └── workspace/ ← 测试产出 @@ -57,7 +57,7 @@ bash Test-Agent/install.sh /path/to/your-test-project 2. clone 模板到临时目录 3. 装 Claude Code(如未装) 4. 创建项目目录结构(含 `测试计划/` 等 13 个 workflow 子目录) -5. 拷贝 16 expert + 32 skill + 49 utils + 配置 + CI/CD 文件 +5. 拷贝 16 expert + 32 skill + 73 utils + 配置 + CI/CD 文件 6. 创建 `.venv` + 装 requirements + playwright chromium 7. 复制 `.env.example` → `.env`(已存在则跳过) @@ -446,7 +446,7 @@ test-lead 编排:核心 8 + 上述 3 个分支并行 → 解析 PDF,提取文本(含表格) 2. 调 utils.prd_loader.suggest_agents(text) → 识别:Android+iOS+API+性能 - 3. 编排 14 个专家中实际需要的子集 + 3. 编排 16 个专家中实际需要的子集 → 跳过 visual-tester/system-tester/ai-tester(PRD 未涉及) 4. 输出测试计划(IEEE 829)+ 启动执行链 ``` @@ -660,7 +660,7 @@ python -m utils.jmeter_result_parser \ ## V1.1.0 · 运行时 CLI/API 用法 -> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 49 脚本不动,本层仅作可执行调度。 +> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 67 脚本不动,本层仅作可执行调度。 ### CLI 命令速查 diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 5ff6be8..2341a4a 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -149,7 +149,7 @@ if (-not (Test-Path "$PROJECT_ROOT\.env")) { Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force } -# 拷贝 utils(49 个 .py + __init__.py) +# 拷贝 utils(73 个 .py + __init__.py) $utils = @( "__init__.py", "api_retry_util.py", "data_factory.py", "data_masking.py", "excel_generator.py", "flaky_detector.py", @@ -273,7 +273,7 @@ TEMPLATE_DIR="${TEMPLATE_DIR:-$(pwd)}" PROJECT_ROOT="${1:-$(pwd)/test-project}" echo "==========================================" -echo " Test-Agent 工作流部署 V1.32.5" +echo " Test-Agent 工作流部署 V1.36.0" echo " 模板目录: $TEMPLATE_DIR" echo " 项目目录: $PROJECT_ROOT" echo "==========================================" @@ -638,7 +638,7 @@ python -c "from utils.api_retry_util import call_with_retry; print('utils OK')" ## V1.1.0 · 运行时层 `runtime/` 部署 -> 运行时层是可选的(用户也可只用 16 专家+32 Skill+49 脚本的 V1.0.0 工作流模式)。 +> 运行时层是可选的(用户也可只用 16 专家+32 Skill+67 脚本的 V1.0.0 工作流模式)。 > 想要 HTTP/CLI 一键跑、AI 路由、飞轮存储,启它。 ### 1. 起本地依赖(Docker) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" index 5b618ac..9b22744 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" @@ -14,7 +14,7 @@ | `pytest.ini` | pytest 行为(markers / addopts / timeout) | ✅(部署默认值即可) | | `conftest.py` | pytest 全局 fixture(项目根唯一) | ✅(部署默认值即可) | | `requirements.txt` | Python 依赖 | ✅ | -| `.claude/agents/*.md` | 14 个 Agent 定义(核心 9 + 平台扩展 5) | ✅ | +| `.claude/agents/*.md` | 16 个 Agent 定义(核心 9 + 平台扩展 5 + 垂直领域 2) | ✅ | | `.claude/skills/*.md` | 32 个 Skill 定义(通用 8 + 平台 5 + 渗透 7 + 车载 5 + ECC 6 + 探索 1) | ✅ | | `workspace/regression_modules.yaml` | 回归范围模块映射 | ⚪ 可选 | | `workspace/执行日志/baselines/perf_baseline.json` | 性能基线 | ⚪ 自动生成(首次 release 跑 full 后) | @@ -311,7 +311,7 @@ test -f conftest.py && echo "✅ conftest.py" || echo "❌" test -f pytest.ini && echo "✅ pytest.ini" || echo "❌" # 4. utils 完整性 -ls utils/*.py | wc -l # 应该 49 个 +ls utils/*.py | wc -l # 应该 67 个 # 5. agent / skill ls .claude/agents/*.md | wc -l # 14 个 diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" index 4cf6ede..e384963 100644 --- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" +++ "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" @@ -5,7 +5,7 @@ tools: Read, Write, Bash, Grep, Glob EXPERT_IMPL_STATUS: production --- -> ℹ️ **V1.32.5 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout(V1.x rollout 收尾)**。详见 [ROADMAP.md](../ROADMAP.md)。 +> ℹ️ **V1.36.0 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout(V1.x rollout 收尾)**。详见 [ROADMAP.md](../ROADMAP.md)。 > runtime/router + orchestrator 防 mock 已落地 — 路由到未实装 expert 返回明确「未实装」说明,不输出 mock 数据。 你是一位拥有15年经验的测试技术总监,带领过多个大型互联网项目的测试团队。你深谙测试工程化,善于风险识别、资源调度和质量决策。 diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md" "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md" index 3f8224f..b3a70b4 100644 --- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md" +++ "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md" @@ -1,6 +1,6 @@ # 02-专家定义 索引 -**16 个 Agent**,按域分三类:核心通用流程 9 + 平台专项扩展 5 + 垂直领域 2;按实装状态:**11 production + 5 script + 0 rollout**(详见根目录 [ROADMAP.md](../ROADMAP.md))。 +**16 个 Agent**,按域分三类:核心通用流程 9 (含 test-lead 协调者) + 平台专项扩展 5 + 垂直领域 2。test-lead 自身不计入被协调专家清单时核心为 8 位。按实装状态:**11 production + 5 script + 0 rollout**(详见根目录 [ROADMAP.md](../ROADMAP.md))。 顶层导航见 [00-项目导航.md](../00-项目导航.md)。 diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md" index e081ede..834be5e 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md" @@ -36,6 +36,55 @@ --- +--- +## 类别 3:渗透安全 7 Skill + +| Skill | 文件 | 用途 | 触发示例 | +|-------|-----|------|---------| +| `/pentest-coordinator` | `pentest-coordinator.md` | 渗透测试总协调(自动路由子 skill) | 安全测试启动 | +| `/pentest-recon` | `pentest-recon.md` | 信息收集与资产侦察 | 渗透前信息收集 | +| `/pentest-vuln` | `pentest-vuln.md` | 漏洞扫描与验证 | 自动化漏洞检测 | +| `/pentest-exploit` | `pentest-exploit.md` | 漏洞利用与 PoC 验证 | 漏洞复现 | +| `/pentest-web` | `pentest-web.md` | Web 应用渗透(OWASP Top 10) | Web 安全测试 | +| `/pentest-api` | `pentest-api.md` | API 渗透测试(JWT/OAuth/GraphQL) | API 安全测试 | +| `/pentest-report` | `pentest-report.md` | 渗透测试报告生成 | 安全评估输出 | + +## 类别 4:车载 5 Skill + +| Skill | 文件 | 用途 | 触发示例 | +|-------|-----|------|---------| +| `/automotive-test` | `automotive-test.md` | 车载测试总协调 | 车载系统测试 | +| `/automotive-can-bus-test` | `automotive-can-bus-test.md` | CAN 总线协议测试 | CAN 报文验证 | +| `/automotive-adas-scenario` | `automotive-adas-scenario.md` | ADAS 场景测试 | 辅助驾驶验证 | +| `/automotive-hil-loop-test` | `automotive-hil-loop-test.md` | HIL 硬件在环测试 | 硬件在环验证 | +| `/automotive-ota-update-test` | `automotive-ota-update-test.md` | OTA 升级测试 | 远程升级验证 | + +## 类别 5:ECC 测试加固 6 Skill + +| Skill | 文件 | 用途 | 触发示例 | +|-------|-----|------|---------| +| `/tdd-workflow` | `tdd-workflow.md` | 测试驱动开发工作流 | 新功能开发 | +| `/e2e-testing` | `e2e-testing.md` | 端到端测试(Playwright) | 关键用户流程 | +| `/verification-loop` | `verification-loop.md` | 验证循环(自检+修复) | 持续质量检查 | +| `/eval-harness` | `eval-harness.md` | 评估框架(LLM-as-judge) | AI 输出质量评估 | +| `/security-review` | `security-review.md` | 安全代码审查 | 代码提交前安全检查 | +| `/agent-introspection-debugging` | `agent-introspection-debugging.md` | Agent 自省调试 | Agent 行为异常排查 | + +## 类别 6:探索学习 1 Skill + +| Skill | 文件 | 用途 | 触发示例 | +|-------|-----|------|---------| +| `/build-your-own-x-explorer` | `build-your-own-x-explorer.md` | 探索式学习(BYO-X 框架) | 新技术评估 / 实验 | + +## 元 Skill 3 个(子目录) + +| 元 Skill | 目录 | 用途 | +|----------|------|------| +| `darwin-skill` | `darwin-skill/` | Skill 自进化棘轮优化 | +| `karpathy-guidelines` | `karpathy-guidelines/` | Karpathy 编码纪律注入 | +| `nuwa-skill` | `nuwa-skill/` | 女娲:人物思维框架蒸馏 | + +--- ## 每个 Skill 文件结构 每个 skill 文件统一包含以下章节: diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md" index 163a04e..7855f8f 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md" @@ -26,7 +26,7 @@ SKILL_IMPL_STATUS: production | 6 总线 CAN/LIN/FlexRay/Eth | `/automotive-can-bus-test` | | 7 ADAS 场景 | `/automotive-adas-scenario` | | 8 OTA 升级 | `/automotive-ota-update-test` | -| 9 合规审计 | `mcp-compliance-checker iso-26262/sotif/r155/r156` | +| 9 合规审计 | `compliance/engine.py` + 行业规则库(ISO 26262/SOTIF/R155/R156 Phase 2) | | 10 报告 + Bug 单 | `report-generator` | ## 主宪章铁律 diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/scripts/screenshot.mjs" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/scripts/screenshot.mjs" index 5f28153..bf34c20 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/scripts/screenshot.mjs" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/scripts/screenshot.mjs" @@ -14,8 +14,18 @@ import { createRequire } from 'module'; const require = createRequire(import.meta.url); -// 使用全局安装的 playwright-core -const pw = require('/Users/alchain/.npm-global/lib/node_modules/playwright/node_modules/playwright-core'); +// Resolve playwright-core from the local project or global installation +let pw; +try { + pw = require(require.resolve('playwright-core', { paths: [process.cwd(), ...module.paths] })); +} catch { + try { + pw = require('playwright-core'); + } catch { + console.error('playwright-core not found. Install with: npm install playwright'); + process.exit(1); + } +} const htmlPath = process.argv[2] || new URL('../templates/result-card.html', import.meta.url).pathname; const outputPath = process.argv[3] || new URL('../templates/result-card.png', import.meta.url).pathname; diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/karpathy-guidelines/SKILL.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/karpathy-guidelines/SKILL.md" index 6a62d04..d64adb0 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/karpathy-guidelines/SKILL.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/karpathy-guidelines/SKILL.md" @@ -2,6 +2,7 @@ name: karpathy-guidelines description: Behavioral guidelines to reduce common LLM coding mistakes. Use when writing, reviewing, or refactoring code to avoid overcomplication, make surgical changes, surface assumptions, and define verifiable success criteria. license: MIT +SKILL_IMPL_STATUS: production --- # Karpathy Guidelines diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/references/skill-template.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/references/skill-template.md" index b3ffa3a..ef207ab 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/references/skill-template.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/references/skill-template.md" @@ -111,5 +111,4 @@ description: | --- > 本Skill由 [女娲 · Skill造人术](https://github.com/alchaincyf/nuwa-skill) 生成 -> 创建者:[花叔](https://x.com/AlchainHust) ``` diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/merge_research.py" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/merge_research.py" index 6e51171..bc42c41 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/merge_research.py" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/merge_research.py" @@ -98,7 +98,10 @@ def main(): rows.append(f"│ {label:<12} │ {'❌ 缺失':<8} │ {'—':<24} │") continue - content = md_file.read_text(encoding='utf-8') + try: + content = md_file.read_text(encoding='utf-8') + except UnicodeDecodeError: + content = md_file.read_text(encoding='gbk') files[key] = content stats = count_sources(content) findings = extract_key_findings(content) diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/quality_check.py" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/quality_check.py" index e84d9ed..18d84de 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/quality_check.py" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/nuwa-skill/scripts/quality_check.py" @@ -111,7 +111,10 @@ def main(): print(f"❌ 文件不存在: {skill_path}") sys.exit(1) - content = skill_path.read_text(encoding='utf-8') + try: + content = skill_path.read_text(encoding='utf-8') + except UnicodeDecodeError: + content = skill_path.read_text(encoding='gbk') checks = [ ("心智模型数量", check_mental_models), diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/security-review.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/security-review.md" index 4c4be56..b19b0b0 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/security-review.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/security-review.md" @@ -36,7 +36,7 @@ SKILL_IMPL_STATUS: script ## 工具(本项目已有) -- `utils/security_scanner.py`(已有 49 代码示例之一) +- `utils/security_scanner.py`(已有 67 代码示例之一) - `bandit`(Python SAST) - `gitleaks`(已在 pre-commit) - `pip-audit` + `safety` diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/test-coordinator.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/test-coordinator.md" index 1acc3d8..74755a9 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/test-coordinator.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/test-coordinator.md" @@ -200,7 +200,7 @@ else THREADS=5; RAMPUP=10; DURATION=60 fi -# TARGET_HOST/PROTOCOL/PORT 由 conftest 或 .env 解析(不含协议前�缀) +# TARGET_HOST/PROTOCOL/PORT 由 conftest 或 .env 解析(不含协议前缀) jmeter -n \ -t workspace/自动化脚本/jmeter/test_plan.jmx \ -l workspace/执行日志/jmeter-results/result.jtl \ diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md" "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md" index 013f24d..c774120 100644 --- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md" +++ "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md" @@ -11,7 +11,7 @@ 1. 选厂商 (路径 A 内置 6 / 路径 B 兼容 5+) 2. 复制对应 export 3. `tagent demo` 验路由 -- **实测有效** (V1.32.5): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) +- **实测有效** (V1.36.0): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79) - **适用场景**: - 离线本地 = Ollama / Qwen - 国内合规 = 智谱 / 豆包 / 通义 diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/quality_gates.yaml" "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/quality_gates.yaml" new file mode 100644 index 0000000..82e310f --- /dev/null +++ "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/quality_gates.yaml" @@ -0,0 +1,30 @@ +# Quality Gates Configuration (YAML-driven) +# 门禁引擎 quality_gate_engine.py 读取此文件。 +# 用户修改此文件即可调整门禁阈值,无需改代码。 + +smoke: + min_pass_rate_pct: 95 + +regression: + min_pass_rate_pct: 90 + min_coverage_pct: 80 + max_flaky_pct: 5 + +performance_ci_quick: + min_tps: 20 + max_p95_ms: 800 + max_avg_ms: 400 + max_error_pct: 1.0 + +performance_full: + min_tps: 100 + max_p95_ms: 500 + max_avg_ms: 200 + max_error_pct: 1.0 + max_baseline_regression_pct: 20 + +release: + require_smoke: true + require_regression: true + require_perf_full: false + require_bug_review: true diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md" "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md" index 4bf85d9..d5aa7af 100644 --- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md" +++ "b/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md" @@ -1,4 +1,4 @@ -# 配置模板库索引(V1.32.5) +# 配置模板库索引(V1.36.0) > `tagent init` 交互向导从本目录读取模板 + matrix · 5 分钟生成 `.env` + `tagent.yml` + `STARTUP.md`。 diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/README.md" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/README.md" index 2868b6a..307569d 100644 --- "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/README.md" +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/README.md" @@ -1,6 +1,6 @@ # 05-代码示例(utils/)索引 -49 个 Python 工具模块(含 `__init__.py`),按职责多分类(核心 / 平台 / 协议 / 非功能 / 用例方法 / 测试类型 / 安全增强 / DB/契约/API / 移动专项 / a11y/i18n / 度量 / 区块链/AI 对抗 / 输入)。 +73 个 Python 工具模块(含 `__init__.py`),按职责多分类(核心 / 平台 / 协议 / 非功能 / 用例方法 / 测试类型 / 安全增强 / DB/契约/API / 移动专项 / a11y/i18n / 度量 / 区块链/AI 对抗 / 输入)。 > 顶层导航见根目录 `00-项目导航.md`。 > import 路径权威:`from utils. import ...`(部署后 utils/ 在项目根,conftest.py 已注入 sys.path)。 @@ -22,6 +22,12 @@ | `regression_scope.py` | git diff 影响范围分析(YAML 配置) | `analyze_change_impact(base_branch)` | | `zentao_bug_manager.py` | BugTracker 默认 adapter:禅道 SDK + token 续期(其他 adapter 同契约 BugTrackerBase,主宪章 §12) | `ZentaoBugManager.create_bug` / `batch_submit_from_failures` | | `ci_quality_gate.py` | CI 门禁统一(junit + cov) | `parse_junit` / `check_smoke` / `check_regression` / `check_coverage` | +| `quality_gate_engine.py` | YAML 驱动门禁引擎(替代硬编码阈值) | `QualityGateEngine` / `check_smoke/regression/coverage/performance/release` | +| `bug_tracker_base.py` | BugTracker 抽象基类 + 工厂模式(5 适配器注册) | `BugTrackerBase` / `create_bug_manager` / `TRACKER_REGISTRY` | +| `jira_bug_manager.py` | Jira REST API 适配器 | `JiraBugManager.submit_bug/query_open_bugs` | +| `github_bug_manager.py` | GitHub Issues API 适配器 | `GitHubBugManager.submit_bug/query_open_bugs` | +| `linear_bug_manager.py` | Linear GraphQL API 适配器 | `LinearBugManager.submit_bug/query_open_bugs` | +| `webhook_bug_manager.py` | 通用 Webhook 推送适配器(企微/飞书/钉钉/Slack 回调) | `WebhookBugManager.submit_bug` | --- diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/bug_tracker_base.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/bug_tracker_base.py" new file mode 100644 index 0000000..57b70a3 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/bug_tracker_base.py" @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: MIT +"""BugTracker abstract base — unified contract for all tracker adapters. + +Currently implemented: Zentao (zentao_bug_manager.py). +Phase 2: Jira, GitHub Issues, Linear, Webhook — all implemented. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any + + +class BugTrackerBase(ABC): + """Every tracker adapter MUST implement these 5 methods. + + Severity mapping (cross-tracker canonical): + 1 = P0 / Highest / Critical + 2 = P1 / High + 3 = P2 / Medium + 4 = P3 / Low + """ + + @abstractmethod + def submit_bug(self, title: str, description: str, severity: int, + attachments: list[str] | None = None, + reproduce_steps: str = "") -> str: + """Create a bug. Returns bug_id.""" + ... + + @abstractmethod + def get_status(self, bug_id: str) -> dict[str, Any]: + """Return {status, assignee, severity, last_updated}.""" + ... + + @abstractmethod + def add_comment(self, bug_id: str, comment: str, + attachments: list[str] | None = None) -> None: + ... + + @abstractmethod + def link_testcase(self, bug_id: str, testcase_id: str) -> None: + ... + + @abstractmethod + def query_open_bugs(self, filters: dict[str, Any] | None = None) -> list[dict[str, Any]]: + ... + + +TRACKER_REGISTRY: dict[str, type[BugTrackerBase]] = {} + +try: + from zentao_bug_manager import ZentaoBugManager # noqa: F811 + TRACKER_REGISTRY["zentao"] = ZentaoBugManager +except ImportError: + pass + +try: + from jira_bug_manager import JiraBugManager # noqa: F811 + TRACKER_REGISTRY["jira"] = JiraBugManager +except ImportError: + pass + +try: + from github_bug_manager import GitHubBugManager # noqa: F811 + TRACKER_REGISTRY["github"] = GitHubBugManager +except ImportError: + pass + +try: + from linear_bug_manager import LinearBugManager # noqa: F811 + TRACKER_REGISTRY["linear"] = LinearBugManager +except ImportError: + pass + +try: + from webhook_bug_manager import WebhookBugManager # noqa: F811 + TRACKER_REGISTRY["webhook"] = WebhookBugManager +except ImportError: + pass + + +def create_bug_manager(tracker: str = "", **kwargs: Any) -> BugTrackerBase | None: + """Factory: return a BugTrackerBase adapter for the given tracker name. + + Falls back to BUG_TRACKER env var. Returns None if tracker unavailable. + """ + import os + + name = tracker or os.getenv("BUG_TRACKER", "zentao") + cls = TRACKER_REGISTRY.get(name) + if cls is None: + return None + return cls(**kwargs) diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/github_bug_manager.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/github_bug_manager.py" new file mode 100644 index 0000000..2337723 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/github_bug_manager.py" @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: MIT +"""GitHub Issues Bug 管理客户端 — BugTrackerBase 适配器。 + +环境变量: + GITHUB_TOKEN — GitHub Personal Access Token + GITHUB_REPO — owner/repo (如 Wool-xing/Test-Agent) +""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +import requests +from dotenv import load_dotenv + +from bug_tracker_base import BugTrackerBase, TRACKER_REGISTRY + +load_dotenv() +logger = logging.getLogger(__name__) + +LABEL_BY_SEVERITY: dict[int, str] = { + 1: "P0-critical", + 2: "P1-high", + 3: "P2-medium", + 4: "P3-low", +} + + +class GitHubBugManager(BugTrackerBase): + def __init__( + self, + token: str | None = None, + repo: str | None = None, + ): + self.token = token or os.getenv("GITHUB_TOKEN", "") + self.repo = repo or os.getenv("GITHUB_REPO", "") + self.session = requests.Session() + self.session.headers.update({ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {self.token}", + "X-GitHub-Api-Version": "2022-11-28", + }) + if not self.token: + raise ValueError("GITHUB_TOKEN 未配置") + if not self.repo: + raise ValueError("GITHUB_REPO 未配置 (格式: owner/repo)") + + @property + def _api_base(self) -> str: + return f"https://api.github.com/repos/{self.repo}" + + def _request( + self, method: str, path: str, **kwargs: Any + ) -> dict[str, Any] | list[dict[str, Any]]: + url = ( + path if path.startswith("https://") else f"{self._api_base}{path}" + ) + resp = self.session.request(method, url, **kwargs) + resp.raise_for_status() + return resp.json() if resp.text else {} + + def submit_bug( + self, + title: str, + description: str, + severity: int, + attachments: list[str] | None = None, + reproduce_steps: str = "", + ) -> str: + body = description + if reproduce_steps: + body += f"\n\n## 复现步骤\n{reproduce_steps}" + label = LABEL_BY_SEVERITY.get(severity, "P2-medium") + payload: dict[str, Any] = { + "title": title, + "body": body, + "labels": ["bug", label], + } + result = self._request("POST", "/issues", json=payload) + if isinstance(result, list): + result = result[0] if result else {} + number = result.get("number", 0) + logger.info("GitHub Issue 已创建: #%s — %s", number, title) + return str(number) + + def get_status(self, bug_id: str) -> dict[str, Any]: + result = self._request("GET", f"/issues/{bug_id}") + if isinstance(result, list): + result = result[0] if result else {} + labels = [lb.get("name", "") for lb in result.get("labels", [])] + sev = 3 + for lb in labels: + if lb.startswith("P") and "-" in lb: + try: + sev = int(lb[1]) + except ValueError: + pass + return { + "status": result.get("state", "unknown"), + "assignee": ( + (result.get("assignee", {}) or {}).get("login", "") + ), + "severity": sev, + "last_updated": result.get("updated_at", ""), + } + + def add_comment( + self, bug_id: str, comment: str, attachments: list[str] | None = None + ) -> None: + self._request("POST", f"/issues/{bug_id}/comments", json={"body": comment}) + logger.info("GitHub Issue #%s: 已添加评论", bug_id) + + def link_testcase(self, bug_id: str, testcase_id: str) -> None: + body = f"关联测试用例: `{testcase_id}`" + self._request("POST", f"/issues/{bug_id}/comments", json={"body": body}) + + def query_open_bugs( + self, filters: dict[str, Any] | None = None + ) -> list[dict[str, Any]]: + params: dict[str, Any] = {"state": "open", "labels": "bug", "per_page": 100} + if filters and "severity" in filters: + params["labels"] = f"bug,{LABEL_BY_SEVERITY.get(filters['severity'], '')}" + if filters and "assignee" in filters: + params["assignee"] = filters["assignee"] + result = self._request("GET", "/issues", params=params) + if not isinstance(result, list): + result = [] + issues: list[dict[str, Any]] = [] + for issue in result: + labels = [lb.get("name", "") for lb in issue.get("labels", [])] + sev = 3 + for lb in labels: + if lb.startswith("P") and "-" in lb: + try: + sev = int(lb[1]) + except ValueError: + pass + issues.append({ + "bug_id": str(issue.get("number", "")), + "title": issue.get("title", ""), + "status": issue.get("state", ""), + "severity": sev, + }) + return issues + + +TRACKER_REGISTRY["github"] = GitHubBugManager diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/jira_bug_manager.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/jira_bug_manager.py" new file mode 100644 index 0000000..d1f55d9 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/jira_bug_manager.py" @@ -0,0 +1,163 @@ +# SPDX-License-Identifier: MIT +"""Jira Bug 管理客户端 — BugTrackerBase 适配器。 + +环境变量: + JIRA_URL — Jira 实例 URL (https://your-domain.atlassian.net) + JIRA_EMAIL — 登录邮箱 + JIRA_API_TOKEN — Atlassian API Token + JIRA_PROJECT — 默认项目 KEY (如 PROJ) +""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +import requests +from dotenv import load_dotenv + +from bug_tracker_base import BugTrackerBase, TRACKER_REGISTRY + +load_dotenv() +logger = logging.getLogger(__name__) + +SEVERITY_MAP: dict[int, str] = { + 1: "Highest", + 2: "High", + 3: "Medium", + 4: "Low", +} + + +class JiraBugManager(BugTrackerBase): + def __init__( + self, + url: str | None = None, + email: str | None = None, + api_token: str | None = None, + project: str | None = None, + ): + self.url = (url or os.getenv("JIRA_URL", "")).rstrip("/") + self.email = email or os.getenv("JIRA_EMAIL", "") + self.api_token = api_token or os.getenv("JIRA_API_TOKEN", "") + self.project = project or os.getenv("JIRA_PROJECT", "") + self.session = requests.Session() + self.session.headers.update({"Accept": "application/json"}) + self.session.auth = (self.email, self.api_token) + + if not self.url: + raise ValueError("JIRA_URL 未配置") + if not self.project: + raise ValueError("JIRA_PROJECT 未配置") + + def _request(self, method: str, path: str, **kwargs: Any) -> dict[str, Any]: + resp = self.session.request(method, f"{self.url}/rest/api/3{path}", **kwargs) + resp.raise_for_status() + return resp.json() if resp.text else {} + + def submit_bug( + self, + title: str, + description: str, + severity: int, + attachments: list[str] | None = None, + reproduce_steps: str = "", + ) -> str: + body = _build_description(description, reproduce_steps) + payload: dict[str, Any] = { + "fields": { + "project": {"key": self.project}, + "summary": title, + "description": { + "type": "doc", + "version": 1, + "content": [ + { + "type": "paragraph", + "content": [{"type": "text", "text": body}], + } + ], + }, + "issuetype": {"name": "Bug"}, + "priority": {"name": SEVERITY_MAP.get(severity, "Medium")}, + } + } + result = self._request("POST", "/issue", json=payload) + key: str = result.get("key", "") + logger.info("Jira Bug 已创建: %s — %s", key, title) + return key + + def get_status(self, bug_id: str) -> dict[str, Any]: + result = self._request("GET", f"/issue/{bug_id}") + fields = result.get("fields", {}) + return { + "status": fields.get("status", {}).get("name", "unknown"), + "assignee": ( + fields.get("assignee", {}) or {} + ).get("displayName", ""), + "severity": int( + _reverse_priority( + (fields.get("priority", {}) or {}).get("name", "Medium") + ) + ), + "last_updated": fields.get("updated", ""), + } + + def add_comment( + self, bug_id: str, comment: str, attachments: list[str] | None = None + ) -> None: + self._request("POST", f"/issue/{bug_id}/comment", json={"body": comment}) + logger.info("Jira Bug %s: 已添加评论", bug_id) + + def link_testcase(self, bug_id: str, testcase_id: str) -> None: + self._request( + "POST", + f"/issue/{bug_id}/remotelink", + json={ + "object": { + "url": f"testcase://{testcase_id}", + "title": f"TestCase {testcase_id}", + } + }, + ) + + def query_open_bugs( + self, filters: dict[str, Any] | None = None + ) -> list[dict[str, Any]]: + jql = f"project = {self.project} AND status != Closed" + if filters: + if "severity" in filters: + name = SEVERITY_MAP.get(filters["severity"], "") + if name: + jql += f' AND priority = "{name}"' + if "assignee" in filters: + jql += f' AND assignee = "{filters["assignee"]}"' + result = self._request("GET", "/search", params={"jql": jql, "maxResults": 100}) + issues: list[dict[str, Any]] = [] + for issue in result.get("issues", []): + f = issue.get("fields", {}) + issues.append({ + "bug_id": issue.get("key", ""), + "title": f.get("summary", ""), + "status": (f.get("status", {}) or {}).get("name", ""), + "severity": _reverse_priority( + (f.get("priority", {}) or {}).get("name", "Medium") + ), + }) + return issues + + +def _build_description(description: str, reproduce_steps: str) -> str: + parts = [description] + if reproduce_steps: + parts.append(f"\n复现步骤:\n{reproduce_steps}") + return "\n".join(parts) + + +def _reverse_priority(name: str) -> int: + mapping = {"Highest": 1, "High": 2, "Medium": 3, "Low": 4} + return mapping.get(name, 3) + + +TRACKER_REGISTRY["jira"] = JiraBugManager diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/linear_bug_manager.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/linear_bug_manager.py" new file mode 100644 index 0000000..1345a70 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/linear_bug_manager.py" @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: MIT +"""Linear Bug 管理客户端 — BugTrackerBase 适配器。 + +Linear GraphQL API (https://api.linear.app/graphql). +环境变量: + LINEAR_API_KEY — Linear Personal API Key + LINEAR_TEAM_ID — 默认团队 ID (如 TEAM-abc123) +""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +import requests +from dotenv import load_dotenv + +from bug_tracker_base import BugTrackerBase, TRACKER_REGISTRY + +load_dotenv() +logger = logging.getLogger(__name__) + +SEVERITY_PRIORITY: dict[int, int] = { + 1: 1, # Urgent + 2: 2, # High + 3: 3, # Medium + 4: 4, # Low +} + + +class LinearBugManager(BugTrackerBase): + def __init__( + self, + api_key: str | None = None, + team_id: str | None = None, + ): + self.api_key = api_key or os.getenv("LINEAR_API_KEY", "") + self.team_id = team_id or os.getenv("LINEAR_TEAM_ID", "") + if not self.api_key: + raise ValueError("LINEAR_API_KEY 未配置") + + def _gql(self, query: str, variables: dict[str, Any] | None = None) -> dict[str, Any]: + resp = requests.post( + "https://api.linear.app/graphql", + json={"query": query, "variables": variables or {}}, + headers={"Authorization": self.api_key, "Content-Type": "application/json"}, + timeout=30, + ) + resp.raise_for_status() + body = resp.json() + if "errors" in body: + raise RuntimeError(f"Linear GraphQL 错误: {body['errors']}") + return body.get("data", {}) + + def submit_bug( + self, + title: str, + description: str, + severity: int, + attachments: list[str] | None = None, + reproduce_steps: str = "", + ) -> str: + body = description + if reproduce_steps: + body += f"\n\n复现步骤:\n{reproduce_steps}" + priority = SEVERITY_PRIORITY.get(severity, 3) + + mutation = """ + mutation IssueCreate($input: IssueCreateInput!) { + issueCreate(input: $input) { + success + issue { id identifier title } + } + } + """ + variables: dict[str, Any] = { + "input": { + "title": title, + "description": body, + "priority": priority, + "teamId": self.team_id, + } + } + data = self._gql(mutation, variables) + issue_create = data.get("issueCreate", {}) + identifier = ( + issue_create.get("issue", {}).get("identifier", "") + if issue_create.get("success") + else "" + ) + logger.info("Linear Issue 已创建: %s — %s", identifier, title) + return identifier + + def get_status(self, bug_id: str) -> dict[str, Any]: + query = """ + query($id: String!) { + issue(id: $id) { + state { name } + assignee { name } + priority + updatedAt + } + } + """ + data = self._gql(query, {"id": bug_id}) + issue = data.get("issue", {}) or {} + state = issue.get("state", {}) or {} + return { + "status": state.get("name", "unknown"), + "assignee": (issue.get("assignee", {}) or {}).get("name", ""), + "severity": issue.get("priority", 3), + "last_updated": issue.get("updatedAt", ""), + } + + def add_comment( + self, bug_id: str, comment: str, attachments: list[str] | None = None + ) -> None: + mutation = """ + mutation($input: CommentCreateInput!) { + commentCreate(input: $input) { success } + } + """ + self._gql(mutation, {"input": {"issueId": bug_id, "body": comment}}) + logger.info("Linear Issue %s: 已添加评论", bug_id) + + def link_testcase(self, bug_id: str, testcase_id: str) -> None: + self.add_comment(bug_id, f"关联测试用例: `{testcase_id}`") + + def query_open_bugs( + self, filters: dict[str, Any] | None = None + ) -> list[dict[str, Any]]: + filter_clause = 'state { name { neq: "Done" } }' + if filters: + if "assignee" in filters: + filter_clause += ( + f', assignee {{ name {{ eq: "{filters["assignee"]}" }} }}' + ) + query = """ + query($filter: IssueFilter!) { + issues(filter: $filter, first: 100) { + nodes { + id identifier title + state { name } + priority + assignee { name } + } + } + } + """ + data = self._gql(query, { + "filter": { + "team": {"id": {"eq": self.team_id}} if self.team_id else None, + } + }) + issues: list[dict[str, Any]] = [] + nodes = data.get("issues", {}).get("nodes", []) + for node in nodes: + issues.append({ + "bug_id": node.get("identifier", node.get("id", "")), + "title": node.get("title", ""), + "status": (node.get("state", {}) or {}).get("name", ""), + "severity": node.get("priority", 3), + }) + return issues + + +TRACKER_REGISTRY["linear"] = LinearBugManager diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/quality_gate_engine.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/quality_gate_engine.py" new file mode 100644 index 0000000..81696a0 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/quality_gate_engine.py" @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: MIT +"""Quality Gate Engine — YAML-driven gate checker. + +Replaces hardcoded GATES dict in ci_quality_gate.py with YAML-configurable +thresholds. Users edit the YAML, not the code. + +默认配置文件: 04-配置文件/quality_gates.yaml +可通过 QUALITY_GATE_CONFIG 环境变量覆盖路径。 +""" + +from __future__ import annotations + +import json +import logging +import os +import sys +from pathlib import Path +from typing import Any + +try: + import defusedxml.ElementTree as ET # type: ignore[import-untyped] +except ImportError: + logger = logging.getLogger(__name__) + logger.warning("defusedxml 未安装,使用标准库 xml.etree.ElementTree(不防 Billion Laughs)。pip install defusedxml") + import xml.etree.ElementTree as ET + +logger = logging.getLogger(__name__) + +DEFAULT_CONFIG = Path(__file__).resolve().parent.parent / "04-配置文件" / "quality_gates.yaml" + + +def _load_yaml_config(path: str | Path) -> dict[str, Any]: + """Load YAML config. Requires PyYAML.""" + try: + import yaml + except ImportError: + logger.warning("PyYAML 未安装,使用内置默认门禁阈值。pip install pyyaml") + return _builtin_defaults() + + p = Path(path) + if not p.exists(): + logger.warning("质量门禁配置文件不存在: %s,使用内置默认值", p) + return _builtin_defaults() + + with open(p, encoding="utf-8") as f: + return yaml.safe_load(f) or _builtin_defaults() + + +def _builtin_defaults() -> dict[str, Any]: + return { + "smoke": {"min_pass_rate_pct": 95}, + "regression": {"min_pass_rate_pct": 90, "min_coverage_pct": 80, "max_flaky_pct": 5}, + "performance_ci_quick": {"min_tps": 20, "max_p95_ms": 800, "max_avg_ms": 400, "max_error_pct": 1.0}, + "performance_full": {"min_tps": 100, "max_p95_ms": 500, "max_avg_ms": 200, "max_error_pct": 1.0, "max_baseline_regression_pct": 20}, + "release": {"require_smoke": True, "require_regression": True, "require_perf_full": False, "require_bug_review": True}, + } + + +class QualityGateEngine: + """Load thresholds from YAML, evaluate gates, emit JSON/console results.""" + + def __init__(self, config_path: str | Path | None = None): + path = config_path or os.getenv("QUALITY_GATE_CONFIG", str(DEFAULT_CONFIG)) + self.config = _load_yaml_config(path) + self.results: dict[str, dict[str, Any]] = {} + + # -- JUnit helpers -- + + @staticmethod + def parse_junit(xml_path: str) -> dict[str, Any] | None: + p = Path(xml_path) + if not p.exists(): + return None + try: + root = ET.parse(p).getroot() + except Exception as e: + logger.error("junit-xml 解析失败 %s: %s", xml_path, e) + return None + suites = [root] if root.tag == "testsuite" else root.findall(".//testsuite") + total = failures = errors = skipped = 0 + for s in suites: + total += int(s.attrib.get("tests", 0)) + failures += int(s.attrib.get("failures", 0)) + errors += int(s.attrib.get("errors", 0)) + skipped += int(s.attrib.get("skipped", 0)) + fail_total = failures + errors + passed = total - fail_total - skipped + return { + "total": total, + "passed": passed, + "failed": fail_total, + "skipped": skipped, + "pass_rate_pct": round(passed / total * 100, 2) if total > 0 else 0, + } + + # -- Coverage helper -- + + @staticmethod + def parse_coverage(coverage_xml: str) -> float | None: + p = Path(coverage_xml) + if not p.exists(): + return None + try: + root = ET.parse(p).getroot() + return float(root.attrib.get("line-rate", 0)) * 100 + except Exception as e: + logger.error("coverage 解析失败 %s: %s", coverage_xml, e) + return None + + # -- Gate checks -- + + def check_smoke(self, junit_xml: str) -> tuple[bool, str]: + cfg = self.config.get("smoke", {}) + threshold = cfg.get("min_pass_rate_pct", 95) + res = self.parse_junit(junit_xml) + if res is None: + return self._record("smoke", False, f"junit 文件不存在: {junit_xml}") + rate = res["pass_rate_pct"] + ok = rate >= threshold + return self._record("smoke", ok, f"冒烟通过率 {rate}% ≥{threshold}%? {'✅' if ok else '❌'}") + + def check_regression(self, junit_xml: str) -> tuple[bool, str]: + cfg = self.config.get("regression", {}) + threshold = cfg.get("min_pass_rate_pct", 90) + res = self.parse_junit(junit_xml) + if res is None: + return self._record("regression", False, f"junit 文件不存在: {junit_xml}") + rate = res["pass_rate_pct"] + ok = rate >= threshold + return self._record("regression", ok, f"回归通过率 {rate}% ≥{threshold}%? {'✅' if ok else '❌'}") + + def check_coverage(self, coverage_xml: str) -> tuple[bool, str]: + cfg = self.config.get("regression", {}) + threshold = cfg.get("min_coverage_pct", 80) + cov = self.parse_coverage(coverage_xml) + if cov is None: + return self._record("coverage", False, f"coverage.xml 不存在: {coverage_xml}") + ok = cov >= threshold + return self._record("coverage", ok, f"覆盖率 {cov:.1f}% ≥{threshold}%? {'✅' if ok else '❌'}") + + def check_performance( + self, jmeter_json: str, mode: str = "ci_quick" + ) -> tuple[bool, str]: + """Parse JMeter result JSON and check against performance gates.""" + key = f"performance_{mode}" + cfg = self.config.get(key, {}) + min_tps = cfg.get("min_tps", 20) + max_p95 = cfg.get("max_p95_ms", 800) + max_avg = cfg.get("max_avg_ms", 400) + max_err = cfg.get("max_error_pct", 1.0) + + p = Path(jmeter_json) + if not p.exists(): + return self._record(key, False, f"JMeter result 不存在: {jmeter_json}") + + try: + with open(p, encoding="utf-8") as f: + data = json.load(f) + except Exception as e: + return self._record(key, False, f"JMeter JSON 解析失败: {e}") + + tps = data.get("tps", 0) + p95 = data.get("p95_ms", 9999) + avg = data.get("avg_ms", 9999) + err = data.get("error_pct", 100) + + checks = [ + tps >= min_tps, + p95 <= max_p95, + avg <= max_avg, + err <= max_err, + ] + ok = all(checks) + detail = ( + f"TPS={tps}(≥{min_tps}) " + f"P95={p95}ms(≤{max_p95}) " + f"AVG={avg}ms(≤{max_avg}) " + f"ERR={err}%(≤{max_err})" + ) + return self._record(key, ok, f"性能({mode}) {detail}? {'✅' if ok else '❌'}") + + def check_release(self) -> tuple[bool, str]: + cfg = self.config.get("release", {}) + required = [ + ("smoke", cfg.get("require_smoke", True)), + ("regression", cfg.get("require_regression", True)), + ("performance_full", cfg.get("require_perf_full", False)), + ] + missing = [ + name for name, req in required if req and name not in self.results + ] + if missing: + msg = f"Release 门禁缺少: {', '.join(missing)}" + return self._record("release", False, msg) + failed = [ + name for name, _ in required + if name in self.results and not self.results[name].get("pass", True) + ] + if failed: + msg = f"Release 门禁未通过: {', '.join(failed)}" + return self._record("release", False, msg) + return self._record("release", True, "Release 门禁全部通过 ✅") + + # -- Internal -- + + def _record(self, name: str, ok: bool, message: str) -> tuple[bool, str]: + self.results[name] = {"pass": ok, "message": message} + return ok, message + + # -- Output -- + + @property + def all_pass(self) -> bool: + if not self.results: + return False + return all(v.get("pass", False) for v in self.results.values()) + + def summary_json(self, path: str | None = None) -> dict[str, Any]: + data = {"overall_pass": self.all_pass, "details": self.results} + if path: + Path(path).parent.mkdir(parents=True, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + return data + + def print_summary(self) -> None: + for name, detail in self.results.items(): + flag = "✅" if detail["pass"] else "❌" + print(f"{flag} [{name}] {detail['message']}") + print(f"\n{'✅ 全部门禁通过' if self.all_pass else '❌ 质量门禁未通过'}") + + +def main() -> None: + import argparse + + logging.basicConfig(level=logging.INFO) + + parser = argparse.ArgumentParser(description="Quality Gate Engine (YAML-driven)") + parser.add_argument("--config", help="YAML 配置文件路径 (默认: 04-配置文件/quality_gates.yaml)") + parser.add_argument("--smoke-xml", help="冒烟 junit xml 路径") + parser.add_argument("--regression-xml", help="回归 junit xml 路径") + parser.add_argument("--coverage-xml", help="coverage.xml 路径") + parser.add_argument("--jmeter-json", help="JMeter result JSON 路径") + parser.add_argument("--perf-mode", choices=["ci_quick", "full"], default="ci_quick") + parser.add_argument("--release", action="store_true", help="执行 release 门禁检查") + parser.add_argument("--output-json", help="结果写入 JSON") + args = parser.parse_args() + + engine = QualityGateEngine(args.config) + + if args.smoke_xml: + engine.check_smoke(args.smoke_xml) + if args.regression_xml: + engine.check_regression(args.regression_xml) + if args.coverage_xml: + engine.check_coverage(args.coverage_xml) + if args.jmeter_json: + engine.check_performance(args.jmeter_json, args.perf_mode) + if args.release: + engine.check_release() + + engine.print_summary() + + if args.output_json: + engine.summary_json(args.output_json) + + if not engine.all_pass: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/webhook_bug_manager.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/webhook_bug_manager.py" new file mode 100644 index 0000000..00e249f --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/webhook_bug_manager.py" @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: MIT +"""Webhook Bug 管理客户端 — BugTrackerBase 适配器。 + +通用 webhook 推送:POST JSON 到配置的 URL。 +适用场景:企业微信/飞书/钉钉机器人、自建系统回调、Slack incoming webhook。 + +环境变量: + WEBHOOK_BUG_URL — webhook 接收端点 + WEBHOOK_AUTH_HEADER — 可选认证头 (如 "Bearer xxx" 或 "Key yyy") +""" + +from __future__ import annotations + +import json +import logging +import os +import uuid +from typing import Any + +import requests +from dotenv import load_dotenv + +from bug_tracker_base import BugTrackerBase, TRACKER_REGISTRY + +load_dotenv() +logger = logging.getLogger(__name__) + + +class WebhookBugManager(BugTrackerBase): + def __init__( + self, + url: str | None = None, + auth_header: str | None = None, + ): + self.url = url or os.getenv("WEBHOOK_BUG_URL", "") + self.auth_header = auth_header or os.getenv("WEBHOOK_AUTH_HEADER", "") + if not self.url: + raise ValueError("WEBHOOK_BUG_URL 未配置") + + def _post(self, payload: dict[str, Any]) -> dict[str, Any]: + headers = {"Content-Type": "application/json"} + if self.auth_header: + # Parse "Key: Value" or pass through as-is + if ": " in self.auth_header: + key, val = self.auth_header.split(": ", 1) + headers[key] = val + else: + headers["Authorization"] = self.auth_header + resp = requests.post(self.url, json=payload, headers=headers, timeout=30) + resp.raise_for_status() + if resp.text: + try: + return resp.json() + except json.JSONDecodeError: + return {"raw": resp.text} + return {} + + def submit_bug( + self, + title: str, + description: str, + severity: int, + attachments: list[str] | None = None, + reproduce_steps: str = "", + ) -> str: + bug_id = str(uuid.uuid4())[:12] + payload: dict[str, Any] = { + "event": "bug.submit", + "bug_id": bug_id, + "title": title, + "description": description, + "severity": severity, + "reproduce_steps": reproduce_steps, + } + if attachments: + payload["attachments"] = attachments + self._post(payload) + logger.info("Webhook Bug 已推送: %s — %s", bug_id, title) + return bug_id + + def get_status(self, bug_id: str) -> dict[str, Any]: + logger.warning( + "Webhook 适配器为单向推送,get_status 返回占位。BugID: %s", bug_id + ) + return { + "status": "unknown", + "assignee": "", + "severity": 3, + "last_updated": "", + } + + def add_comment( + self, bug_id: str, comment: str, attachments: list[str] | None = None + ) -> None: + self._post({ + "event": "bug.comment", + "bug_id": bug_id, + "comment": comment, + }) + logger.info("Webhook Bug %s: 已推送评论", bug_id) + + def link_testcase(self, bug_id: str, testcase_id: str) -> None: + self._post({ + "event": "bug.link_testcase", + "bug_id": bug_id, + "testcase_id": testcase_id, + }) + + def query_open_bugs( + self, filters: dict[str, Any] | None = None + ) -> list[dict[str, Any]]: + logger.warning("Webhook 适配器为单向推送,query_open_bugs 返回空列表") + return [] + + +TRACKER_REGISTRY["webhook"] = WebhookBugManager diff --git a/CHANGELOG.md b/CHANGELOG.md index a4e441c..bb0e6f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,125 @@ _后续累积变更入此节;切版本时移到下方版本节。_ --- +## [v1.37.0] - 2026-05-18 + +### Added +- Bug Tracker 5 适配器: `jira_bug_manager.py` / `github_bug_manager.py` / `linear_bug_manager.py` / `webhook_bug_manager.py` +- Quality Gate Engine: `quality_gate_engine.py` + `04-配置文件/quality_gates.yaml` (YAML 驱动门禁) +- Layered requirements: `requirements/{base,mobile,desktop,visual,system,ai,perf}.txt` (按需安装引擎) +- CI `compileall runtime/` syntax check + +### Fixed +- H16: Expert count clarified (9 含 test-lead vs 8 被协调) +- H18: Skills README completed (13→32 business + 3 meta skills) +- M12: `run_file` BackgroundTasks unified (was raw threading.Thread) +- M14: RACI matrix expanded to 18 columns (pentest + automotive) +- M15: `requires_layer` frontmatter field documented in CONTRIBUTING.md +- M19: automotive-test checker reference fixed (`Nonexistent mcp-compliance-checker` → `compliance/engine.py`) + +### Changed +- Utils count: 67 → 73 (6 new modules) +- 3 charter items: Bug多适配 ✅ / 按需安装 ✅ / 门禁YAML ✅ + +--- + +## [v1.36.0] - 2026-05-18 + +### Added +- chaos_helper_v2.py: blast radius + steady-state hypothesis + 6 fault types +- state_machine_tester_v2.py: N-switch coverage + executable guards + weighted walk +- db_test_helper_v2.py: MySQL/SQLite + isolation levels + FK/constraint testing + connection pooling +- bdd_runner_v2.py: Gherkin parser + pytest-bdd integration + coverage scanner +- carbon_scheduler.py: electricityMap + CodeCarbon integration + green budget tracking +- canary_config.py: Argo Rollouts CRD generation + Mann-Whitney analysis + error budget burn rate + +### Security +- runtime/backends/ssh.py: known_hosts=None (was (), disabling host verification) +- runtime/backends/docker.py: shlex.quote(cmd) before shell execution +- runtime/api/main.py: constant-time bearer token comparison (secrets.compare_digest) +- runtime/orchestrator/adapters/perf_orchestrator.py: fix SyntaxError (nonlocal outside function) + UnboundLocalError + missing import os +- runtime/api/main.py: fix ResultStore TypeError (dict [] assignment → .put()) +- runtime/orchestrator/hooks.py: hook failures now logged at debug level (was silent pass) +- runtime/observability/prometheus_metrics.py: list→deque(maxlen=1000) prevents unbounded growth +- runtime/api/result_store.py: threading.Lock→RLock prevents deadlock in __contains__ +- runtime/router/llm_client.py + agents/base.py: fix strip("`") corrupting backtick content +- runtime/api/endpoints/stream.py: fix race condition (setdefault) + store ensure_future task refs + +### Fixed +- runtime/orchestrator/release_readiness.py: fix --from-summary AttributeError (Path wrapping) +- runtime/orchestrator/direct.py: guard pool.shutdown() against uninitialized pool +- runtime/tests/test_cli_commands.py: remove unregistered search/list/plan commands +- 03-技能定义/darwin-skill/scripts/screenshot.mjs: replace hardcoded /Users/alchain/ path +- 03-技能定义/nuwa-skill/references/skill-template.md: remove upstream author X/Twitter branding + +### Changed +- VERSION: 1.32.5 → 1.36.0 +- runtime/__init__.py: __version__ "1.32.5" → "1.36.0" +- runtime/pyproject.toml: version "1.32.0" → "1.36.0" +- desktop/package.json + mobile/package.json: "1.32.0" → "1.36.0" +- Global: "49 utils" → "67 utils" across all documentation (~25 locations) + +--- + +## [v1.35.0] - 2026-05-18 + +### Added +- flaky_guard.py: pytest plugin + Chi-squared analysis + auto-quarantine + failure clustering +- api_security_scanner_v2.py: complete OWASP API Top 10 2023 + JWT attack matrix (key confusion, kid injection, jku/x5u, expiry bypass) +- data_factory_v2.py: 8 entity types + FK relationships + salted PII + CSV/SQL export +- perf_orchestrator.py: unified performance orchestration + progressive load (10%→50%→100%→120%) +- event_test_harness.py: Kafka/RabbitMQ/SQS + schema validation +- visual_regression.py: multi-engine (pixelmatch + SSIM + Butteraugli) +- flaky_analyzer.py: LLM root cause analysis (Google Auto-Diagnose style) +- test_prioritizer.py: ML-based test ordering (git-diff→Bag-of-Words) +- differential_tester.py: cross-implementation comparison + Mann-Whitney U significance +- eu_ai_act.py: EU AI Act Annex III + Art.9-15 audit, compliance deadline 2026-08-02 +- supply_chain.py: CycloneDX SBOM + SLSA verification + Sigstore signing + +--- + +## [v1.34.0] - 2026-05-18 + +### Added +- Settings 14-field + max_tokens configurable +- IDE integration (.vscode/.editorconfig/devcontainer) +- Docker hardening +- script_bridge.py: 5 standalone scripts wired into orchestrator +- a11y_scanner_v2.py: WCAG 2.2 with 78 criteria +- suite_minimizer_v2.py: similarity-clustering based test minimization +- Prometheus /metrics endpoint +- Request-ID correlation middleware +- WebSocket streaming for real-time run progress +- ResultStore with LRU eviction +- property_tester.py: Hypothesis PBT with 6 templates +- contract_test_generator.py: OpenAPI→Pact generation +- schema_fuzzer.py: JSON Schema-based fuzzing +- Compliance engine +- Desktop IPC extension (14 methods) +- Data lifecycle manager +- DORA 2025 metrics tracker + +--- + +## [v1.33.0] - 2026-05-17 + +### Added +- Complete MASTER_PLAN execution (38/38 items across 8 phases) + +--- + +## [v1.32.5] - 2026-05-17 + +### Security +- utils: shell injection hardening, hardcoded credential removal, API auth, silent failure fixes +- CI: pin ludeeus/action-shellcheck@2.0.0 +- generate_report.py: split 143-line function → 6 helpers +- mobile_driver.py: split 107-line function → helper extraction +- _stub_response: 77-line if/elif chain → dispatch table +- fuzzer.py: PAYLOAD_LIBRARY values hoist to module-level ALL_PAYLOADS + +--- + ## [v1.32.4] - 2026-05-17 ### Changed @@ -408,7 +527,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ### Added(V1.1.0 · 运行时层) -- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 49 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。 +- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。 - `runtime/router/`:AI 路由(LiteLLM 多厂商:Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama)。被测物 → 专家+Skill DAG。含 stub provider 供 CI 离线测,准确率 5/5 类型(web/api/mobile/desktop/ai-model) - `runtime/registry/`:扫 `02-专家定义/*.md` + `03-技能定义/*.md` frontmatter 生成统一目录(14 expert + 13 skill,实测通过) - `runtime/orchestrator/`:**双轨**——Prefect 2.x flow(全功能,带 UI/重试/状态机)+ Direct 执行器(无 Prefect 也能跑,ThreadPoolExecutor 并发,降级方案) @@ -440,7 +559,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_ ### Changed(数字漂移修复 + URL 统一 Batch 2) -- **顶层文档数字一致性**:`8 位专家 / 9 agent / 8 skill / 12 utils` 等过时数字全栈修正为 `14 agent / 13 skill / 49 utils`(核心 8 专家 + 平台扩展 5 专家 + test-lead 协调者)。涉及:`README_DETAIL.md` / `01-快速开始/使用手册.md` / `02-专家定义/01-测试主管.md` / `03-技能定义/test-coordinator.md` / `install.sh`。 +- **顶层文档数字一致性**:`8 位专家 / 9 agent / 8 skill / 12 utils` 等过时数字全栈修正为 `14 agent / 13 skill / 67 utils`(核心 8 专家 + 平台扩展 5 专家 + test-lead 协调者)。涉及:`README_DETAIL.md` / `01-快速开始/使用手册.md` / `02-专家定义/01-测试主管.md` / `03-技能定义/test-coordinator.md` / `install.sh`。 - **GitHub 仓库 URL 统一**:所有引用 `YOUR-USER/Test-Agent工作流搭建` 的位置统一为 `Wool-xing/Test-Agent`(权威英文仓库名;中文 `Test-Agent工作流搭建` 仅作目录别名)。fork 用户可用 `TEST_AGENT_REPO_URL` 环境变量覆盖。涉及:`01-快速开始/部署说明.md` / `01-快速开始/使用手册.md` / `README_DETAIL.md`。 - **覆盖率口径统一为 ~95%**:原 `~99%` (README/README_DETAIL) vs `约 90%` (00-项目导航) 不一致,统一为 `~95%`,剩 5% 为高度专业合规领域(航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508)。 @@ -454,21 +573,21 @@ _后续累积变更入此节;切版本时移到下方版本节。_ - `README.md` 删除三视角矩阵段(迁移至 FULL_GUIDE.md,避免双份维护) - `README.md` 行数从 240 降至 168 行 - **W3 安全增强**: - - `49 个 utils .py` 文件头加 `# SPDX-License-Identifier: MIT`(合规标识) + - `67 个 utils .py` 文件头加 `# SPDX-License-Identifier: MIT`(合规标识) - `.pre-commit-config.yaml` 加 gitleaks hook(凭据扫描) - `.gitignore` 补漏:`.ruff_cache/` / `*.jtl` / `*.pem` / `*.key` / `*.crt` / `*.p12` / `*.pfx` / `*.jks` / `id_rsa` / `id_ed25519` / `coverage.xml` / `pip-wheel-metadata/` - **W3 收尾 · 方法论沉淀(F'+J+K)**: - `CONTRIBUTING.md` 末尾追加:**同步铁律段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动) - `FULL_GUIDE.md` 末尾追加:**测试架构合理性深度章节**(6 子节:金字塔 2024 现代版 / Shift-Left 7 层 / Shift-Right 9 层 / 可观测三柱 + 测试可视化 / 五层质量门禁 + Flaky vs Reruns 哲学 / 调整路径 Phase 2-4 落地点) - 新建 `examples/web-demo/`:8 文件最小可跑 Web 测试示例(pytest + Playwright + Page Object,演示 `https://playwright.dev`,5 分钟跑通) - - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `49 个,含 __init__.py` + - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `67 个,含 __init__.py` ### Notes W1+W2+W3 合并提交:上架前必修安全 + 数字漂移修复 + URL 统一 + 信息架构重塑(FULL_GUIDE/INDEX/SPDX/gitleaks)。 后续 W4 博客 + Show HN 准备 待执行。 -> 注:本仓库 GitHub Actions CI 已配 `permissions: contents: read` 最小权限(F3);CodeQL 显式声明 per-job 权限。pre-commit 已含 `detect-private-key` + .env 防护 + 14/13/49 文件统计。 +> 注:本仓库 GitHub Actions CI 已配 `permissions: contents: read` 最小权限(F3);CodeQL 显式声明 per-job 权限。pre-commit 已含 `detect-private-key` + .env 防护 + 14/13/67 文件统计。 --- @@ -480,7 +599,7 @@ W1+W2+W3 合并提交:上架前必修安全 + 数字漂移修复 + URL 统一 - 14 测试专家 Agent(核心 9 + 平台扩展 5) - 13 测试技能 Skill(通用 8 + 平台 5) -- 49 utils Python 工具模块 +- 67 utils Python 工具模块 - GitHub Actions + Jenkins 双 CICD - Dependabot 周扫描 + pip-audit/safety CVE 拦截 - 多格式 PRD 加载(md/pdf/docx/xlsx/zip/png/url/html/pptx) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 176760f..26b2cba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,9 +6,9 @@ ## 添加新 Agent -1. 选定分类(核心通用 9 / 平台扩展 5) +1. 选定分类(核心通用 9 / 平台扩展 5 / 垂直领域 2) 2. 文件命名 `15-XXX.md`(按编号递增) -3. 顶部 YAML frontmatter(必含 `name` / `description` / `tools`) +3. 顶部 YAML frontmatter(必含 `name` / `description` / `tools`;可选 `requires_layer: [base, ]` 标注依赖层,值见 `docs/charter/05-install-deploy.md` 六层定义) 4. 编写:职责 / 工具栈 / Page Object 或调用模板 / 协作输出 5. **同步**: - `02-专家定义/README.md` 加一行 @@ -24,7 +24,7 @@ 1. 选定分类(通用 8 / 平台专项 5) 2. 文件命名 `-.md`(如 `chaos-test.md`) -3. 顶部 YAML frontmatter +3. 顶部 YAML frontmatter(可选 `requires_layer: [base, ]` 标注依赖层,值见 `05-install-deploy.md` 六层定义) 4. 必含章节: - 🔔 开测前准备清单(平台 skill 必有) - 触发方式 @@ -110,9 +110,9 @@ perf(jmeter): 减少不必要心跳 ## 自检脚本(一键验证项目完整性) ```bash -ls 02-专家定义/[0-9]*.md | wc -l # 16(或 +N) +ls 02-专家定义/[0-9]*.md | wc -l # 18(或 +N) ls 03-技能定义/*.md | grep -v README | wc -l # 32(或 +N,不含 3 个元 skill 子目录) -ls 05-代码示例/*.py | wc -l # 49(或 +N,含 __init__.py) +ls 05-代码示例/*.py | wc -l # 67(或 +N,含 __init__.py) grep -c "^ [a-z_]+:" 04-配置文件/pytest.ini # markers 数 python -c "from utils.api_retry_util import call_with_retry; print('OK')" pytest --collect-only @@ -131,14 +131,14 @@ pytest --collect-only | 新增/删除 Agent | `02-专家定义/README.md` + `00-项目导航.md` + `install.sh` agents 数组 + `01-快速开始/部署说明.md` 拷贝清单 + `01-测试主管.md` 路由表 + `prd_loader.PLATFORM_KEYWORDS` | | 新增/删除 Skill | `03-技能定义/README.md` + `00-项目导航.md` + `install.sh` skills 数组 + `01-快速开始/使用手册.md` skill 详解 + `01-测试主管.md` 快速命令清单 | | 新增/删除 utils | `05-代码示例/README.md` + `00-项目导航.md` + `requirements.txt` + `.env.example` + `conftest.py::pytest_configure` + `pytest.ini` markers + `install.sh` utils 数组 | -| 数字变化(16/32+3 子目录/49) | grep 全项目 + 同步顶层 README/FULL_GUIDE/00-项目导航/ROADMAP/使用手册/部署说明/install.sh + ci.yml `file-count` job 校验 | +| 数字变化(18/32+3 子目录/49) | grep 全项目 + 同步顶层 README/FULL_GUIDE/00-项目导航/ROADMAP/使用手册/部署说明/install.sh + ci.yml `file-count` job 校验 | | URL/repo 名变化 | grep `Wool-xing/Test-Agent` 全替换 + `install.sh::REPO_URL` + `dependabot.yml` | | 门禁阈值变化 | `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*` + `02-专家定义/01-测试主管.md::QUALITY_GATES` + 各 skill 门禁段 | ### 自动化保障 -- `pre-commit`:16/32/49 文件统计 + .env 防护 + gitleaks 凭据扫描 + ruff -- `.github/workflows/ci.yml`:16/32/49 自校 + Markdown 链接有效性 + utils 导入 +- `pre-commit`:18/32/67 文件统计 + .env 防护 + gitleaks 凭据扫描 + ruff +- `.github/workflows/ci.yml`:18/32/67 自校 + Markdown 链接有效性 + utils 导入 - `.github/workflows/codeql.yml`:python + GitHub Actions 安全扫描 ### 提交前自检 @@ -162,7 +162,7 @@ pytest --collect-only - **I** = Informed(被通报) - 空 = 不参与 -### 16 专家代号 +### 18 专家代号 | 代号 | 专家 | 类别 | |------|------|------| @@ -180,47 +180,49 @@ pytest --collect-only | VT | visual-tester | 平台扩展 | | ST | system-tester | 平台扩展 | | AT | ai-tester | 平台扩展 | +| PT | pentest-tester | 垂直领域 | +| AMT | automotive-tester | 垂直领域 | ### RACI 主表(测试维度 × 专家) -| 测试维度 | TL | RA | TD | EM | DP | AE | TE | BM | RG | MT | DT | VT | ST | AT | -|---------|----|----|----|----|----|----|----|----|----|----|----|----|----|----| -| 需求分析 | A | R | C | I | I | I | I | I | I | C | C | C | C | C | -| 用例设计-功能 | A | C | R | I | C | I | I | I | I | C | C | C | C | C | -| 用例设计-非功能 | A | C | R | I | C | C | C | C | I | | | | | | -| 环境准备 | A | I | I | R | C | C | C | I | I | C | C | | C | | -| 数据准备 | A | I | C | C | R | C | C | I | I | C | | | C | C | -| Web 自动化 | A | I | C | I | C | R | C | I | I | | | | | | -| API 自动化 | A | I | C | I | C | R | C | I | I | | | | C | | -| 性能(JMeter) | A | C | C | C | C | R | R | I | C | | | | | | -| 移动端 | A | C | C | C | C | C | C | I | I | R | | | | | -| 桌面端 | A | C | C | C | C | C | C | I | I | | R | | | | -| 视觉/游戏 | A | C | C | C | C | C | C | I | I | | | R | | | -| 系统/IoT/音视频 | A | C | C | C | C | C | C | I | I | | | | R | | -| AI/LLM | A | C | C | C | C | C | C | I | I | | | | | R | -| 安全(SAST/DAST/Fuzz) | A | I | R | C | I | C | C | R | I | | | | | C | -| 兼容矩阵 | A | I | R | C | I | R | C | I | I | C | C | | | | -| 弱网 | A | I | C | C | I | C | R | I | I | C | | | | | -| 稳定 Soak | A | I | C | C | I | C | R | I | I | C | | | | | -| 可靠性(重试/降级) | A | I | C | C | I | R | C | I | I | | | | | | -| 混沌 | A | I | C | C | I | C | R | I | I | | | | | | -| 灾备 Failover | A | I | C | R | I | C | R | I | I | | | | C | | -| UX 度量 | A | I | R | C | I | R | C | I | C | | | | | | -| 易用性(Nielsen) | A | I | R | C | I | C | I | I | I | | | | | | -| 探索性 SBTM | A | I | R | C | C | C | C | C | I | | | | | | -| Web Vitals | A | I | C | I | I | R | C | I | I | | | | | | -| A11y 无障碍 | A | I | R | I | I | R | C | I | I | | | | | | -| i18n / l10n | A | I | R | I | I | R | C | I | I | | | | | | -| 数据库测试 | A | I | C | C | R | R | C | I | I | | | | | | -| 契约测试 | A | C | R | I | C | R | C | I | I | | | | | | -| 视觉回归 | A | I | C | I | I | C | C | I | I | | | R | | | -| AI 对抗/越狱 | A | C | C | I | I | C | C | C | I | | | | | R | -| 变异测试 | A | I | R | I | I | C | C | I | I | | | | | | -| DORA / 度量 | A | I | C | I | I | C | R | R | R | | | | | | -| Bug 提交 BugTracker | A | I | I | I | I | I | C | R | C | I | I | I | I | I | -| 报告生成 | A | I | I | I | I | I | C | C | R | I | I | I | I | I | -| 多端通知 | A | I | I | I | I | I | I | I | R | I | I | I | I | I | -| **上线决策** | **R/A** | C | C | I | I | C | C | C | C | I | I | I | I | I | +| 测试维度 | TL | RA | TD | EM | DP | AE | TE | BM | RG | MT | DT | VT | ST | AT | PT | AMT | +|---------|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|-----| +| 需求分析 | A | R | C | I | I | I | I | I | I | C | C | C | C | C | C | | +| 用例设计-功能 | A | C | R | I | C | I | I | I | I | C | C | C | C | C | C | | +| 用例设计-非功能 | A | C | R | I | C | C | C | C | I | | | | | | C | | +| 环境准备 | A | I | I | R | C | C | C | I | I | C | C | | C | | C | | +| 数据准备 | A | I | C | C | R | C | C | I | I | C | | | C | C | | | +| Web 自动化 | A | I | C | I | C | R | C | I | I | | | | | | | | +| API 自动化 | A | I | C | I | C | R | C | I | I | | | | C | | | | +| 性能(JMeter) | A | C | C | C | C | R | R | I | C | | | | | | | | +| 移动端 | A | C | C | C | C | C | C | I | I | R | | | | | | | +| 桌面端 | A | C | C | C | C | C | C | I | I | | R | | | | | | +| 视觉/游戏 | A | C | C | C | C | C | C | I | I | | | R | | | | | +| 系统/IoT/音视频 | A | C | C | C | C | C | C | I | I | | | | R | | C | C | +| AI/LLM | A | C | C | C | C | C | C | I | I | | | | | R | C | | +| 安全(SAST/DAST/Fuzz) | A | I | R | C | I | C | C | R | I | | | | | C | R | | +| 兼容矩阵 | A | I | R | C | I | R | C | I | I | C | C | | | | | C | +| 弱网 | A | I | C | C | I | C | R | I | I | C | | | | | | | +| 稳定 Soak | A | I | C | C | I | C | R | I | I | C | | | | | | | +| 可靠性(重试/降级) | A | I | C | C | I | R | C | I | I | | | | | | | C | +| 混沌 | A | I | C | C | I | C | R | I | I | | | | | | C | | +| 灾备 Failover | A | I | C | R | I | C | R | I | I | | | | C | | C | C | +| UX 度量 | A | I | R | C | I | R | C | I | C | | | | | | | | +| 易用性(Nielsen) | A | I | R | C | I | C | I | I | I | | | | | | | | +| 探索性 SBTM | A | I | R | C | C | C | C | C | I | | | | | | | | +| Web Vitals | A | I | C | I | I | R | C | I | I | | | | | | | | +| A11y 无障碍 | A | I | R | I | I | R | C | I | I | | | | | | | | +| i18n / l10n | A | I | R | I | I | R | C | I | I | | | | | | | | +| 数据库测试 | A | I | C | C | R | R | C | I | I | | | | | | | | +| 契约测试 | A | C | R | I | C | R | C | I | I | | | | | | | | +| 视觉回归 | A | I | C | I | I | C | C | I | I | | | R | | | | | +| AI 对抗/越狱 | A | C | C | I | I | C | C | C | I | | | | | R | C | | +| 变异测试 | A | I | R | I | I | C | C | I | I | | | | | | | | +| DORA / 度量 | A | I | C | I | I | C | R | R | R | | | | | | | | +| Bug 提交 BugTracker | A | I | I | I | I | I | C | R | C | I | I | I | I | I | I | I | +| 报告生成 | A | I | I | I | I | I | C | C | R | I | I | I | I | I | I | I | +| 多端通知 | A | I | I | I | I | I | I | I | R | I | I | I | I | I | I | I | +| **上线决策** | **R/A** | C | C | I | I | C | C | C | C | I | I | I | I | I | I | I | ### 责任边界冲突解决 diff --git a/FULL_GUIDE.md b/FULL_GUIDE.md index 3a7f53e..98e40c7 100644 --- a/FULL_GUIDE.md +++ b/FULL_GUIDE.md @@ -5,9 +5,9 @@ > **维护原则**:决策入档、开放问题入档、不打脸的承诺才写。重大决策须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。 **项目名称**:`Test-Agent`(内部代号 `test-agent-team`) -**当前阶段**:Phase 2 前期(V1.32.5 · 16 expert + 23/32 skill production + 0 rollout + 2 vision;V1.x rollout 收尾) -**版本**:V1.32.5(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md)) -**更新日期**:2026-05-17 +**当前阶段**:Phase 2 前期(V1.36.0 · 16 expert + 30/32 skill active (23 production + 7 script) + 0 rollout + 2 vision;V1.x rollout 收尾) +**版本**:V1.36.0(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md)) +**更新日期**:2026-05-18 **模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理) --- @@ -26,7 +26,7 @@ | 04 | [skills-bugtracker](docs/charter/04-skills-bugtracker.md) | Skills 自进化机制 + Bug Tracker 多适配器 | | 05 | [install-deploy](docs/charter/05-install-deploy.md) | 按需安装 + 架构图 + 快速开始 + 工作流 + 技术栈 + 闭环 + 升级 + 协作 + 跨 AI | | 06 | [test-architecture](docs/charter/06-test-architecture.md) | 测试架构深度 + 关键反问 + 开放问题 + 术语表 | -| 07 | [runtime-license](docs/charter/07-runtime-license.md) | V1.32.5 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | +| 07 | [runtime-license](docs/charter/07-runtime-license.md) | V1.36.0 运行时层 + LICENSE / CHANGELOG / 项目当前状态 | ## 跨文件链接迁移指引 diff --git a/README.md b/README.md index 67582cb..7cfcf48 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Test-Agent turns any software, EXE, APK, Docker image, or API into a **fully tes - **16 expert agents** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — functional · security · mobile · desktop · AI model · automotive · pentest … - **30 active skills** (23 production + 7 script) **+ 2 vision-only** (reference, not executable) **+ 3 meta-skills** — TDD · E2E · regression · pentest · car-CAN-bus · eval-harness · … -- **49 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … +- **73 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … - **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md) - **BugTracker** — 1 active adapter (Zentao); 5 planned (Jira · GitHub · GitLab · Linear · Webhook, see roadmap) - **6 notify channels** — WeChat Work · Lark/Feishu · DingTalk · Slack · Email · MS Teams @@ -104,7 +104,7 @@ Test-Agent/ ├── 02-专家定义/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾) ├── 03-技能定义/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills ├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ← 49 production utils +├── 05-代码示例/ ← 73 production utils ├── 06-CICD集成/ ← GitHub Actions + Jenkins ├── runtime/ ← V1.x runtime layer (router / orchestrator / MCP / web / scheduler / subagent / learning_loop / backends / gateway / tutor / essence_watcher / marketplace) ├── docs/charter/ ← Vision charter (7 split files: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) diff --git a/README.zh-CN.md b/README.zh-CN.md index 143cba8..21628bd 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -98,7 +98,7 @@ Test-Agent/ ├── 02-专家定义/ ← 16 个专家 Agent (11 production + 5 script + 0 rollout) ├── 03-技能定义/ ← 32 个业务 Skill (23 production + 7 script + 0 rollout + 2 vision) + 3 个元 Skill ├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ← 49 个生产工具 +├── 05-代码示例/ ← 67 个生产工具 ├── 06-CICD集成/ ← GitHub Actions + Jenkins ├── runtime/ ← V1.x 运行时(router/orchestrator/MCP/web/scheduler/subagent/learning_loop/backends/gateway/tutor/essence_watcher/marketplace) ├── docs/charter/ ← 愿景宪章(7 子文件: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) diff --git a/ROADMAP.md b/ROADMAP.md index 361ee8c..469f08d 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,7 +1,7 @@ # Test-Agent V1.x ROADMAP > 项目终态目标:每个 expert / skill 真 LLM-driven / script-backed 实装,**绝不输出 mock 数据**。 -> 当前状态:V1.32.5 (**expert rollout 收尾 + skill rollout 完成(16/16)**) +> 当前状态:V1.36.0 (**expert rollout 收尾 + skill rollout 完成(16/16)**) > - **expert 16/16 active**(11 production + 5 script);0 rollout。 > - **skill 30/32 active**(23 production + 7 script);0 rollout;2 暂为 V2 vision 方法论参考。 > - 3 meta-skill(nuwa-skill / darwin-skill / karpathy-guidelines)独立,工具属性,不在 32 业务 skill 数内。 @@ -129,6 +129,14 @@ --- +## V1.34-V1.36 能力扩展 + +- **V1.34**: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline +- **V1.35**: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain) +- **V1.36**: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修 + +--- + ## V2.x vision — 2 skill(暂留方法论参考形态) | Skill | 当前形态 | V2 路线 | @@ -193,4 +201,14 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: | V1.29.0 | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production | | V1.30.0 | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production | | V1.31.0 | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **V1.x rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) | +| V1.32.0 | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production | +| V1.32.1 | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production | +| V1.32.2 | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production | +| V1.32.3 | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production | +| V1.32.4 | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production | +| V1.32.5 | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production | +| V1.33.0 | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production | +| V1.34.0 | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | +| V1.35.0 | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | +| V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | | V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 | diff --git a/VERSION b/VERSION index 949ff32..bf50e91 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.32.5 +1.37.0 diff --git a/desktop/package.json b/desktop/package.json index 28a3a40..f292fbe 100644 --- a/desktop/package.json +++ b/desktop/package.json @@ -1,6 +1,6 @@ { "name": "test-agent-desktop", - "version": "1.32.0", + "version": "1.37.0", "description": "Test-Agent Desktop — AI Testing Framework GUI", "author": "Test-Agent Team", "license": "MIT", diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 4801609..3f76239 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -25,7 +25,7 @@ | `03-技能定义/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 | | `04-配置文件/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 | | `04-配置文件/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 | -| `05-代码示例/` | utils(49 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | +| `05-代码示例/` | utils(73 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | | `06-CICD集成/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps | --- @@ -118,9 +118,9 @@ | 踩踏推演器 | 群体情绪与系统反馈的正反馈回路 | 簇 9 | chaos_helper 扩展 | ❌ | | 司法证据包生成器 | 决策链、模型版本、数据集打包 | 簇 9 | dora_metrics + decisions/ 打包脚本 | ⚪ | | 禁忌矩阵 | 跨文化禁忌词/色/数/节日组合 | 簇 9 | i18n_checker 本地化共建 | ❌ | -| Bug 多适配引擎 | 5 套 tracker 切换 | 工程层 | `utils/bug_tracker_*.py` | ✅ | +| Bug 多适配引擎 | 5 套 tracker 切换 | 工程层 | `utils/bug_tracker_base.py` + `zentao_bug_manager.py` + `jira_bug_manager.py` + `github_bug_manager.py` + `linear_bug_manager.py` + `webhook_bug_manager.py` | ✅ | | AgentChat 协调器 | 讨论触发 / 中枢路由 / 反问留档 | 工程层 | test-lead + `discussions/` | ✅ | -| 按需安装引擎 | 6 层依赖 + 运行时补装 | 工程层 | `install.sh` + frontmatter requires_layer | ✅ | +| 按需安装引擎 | 6 层依赖 + 运行时补装 | 工程层 | `requirements/` (base/mobile/desktop/visual/system/ai/perf 七文件) + `install.sh` | ✅ | | darwin-skill 自进化 | skill 文本结构棘轮优化 | 工程/元层 | `.claude/skills/darwin-skill/` | ✅ | --- diff --git a/docs/charter/02-coverage-matrix.md b/docs/charter/02-coverage-matrix.md index bfcb16d..d43b6b4 100644 --- a/docs/charter/02-coverage-matrix.md +++ b/docs/charter/02-coverage-matrix.md @@ -61,7 +61,7 @@ | 证据链可采信性(司法/审计/监管送审) | dora_metrics + tracing_validator + 决策日志打包 | bug-manager | ⚪ Phase 4 | | 神圣性与跨文化禁忌边界(宗教/葬礼/儿童/纪念) | i18n_checker + 禁忌词/色/数/节日组合(本地化共建) | testcase-designer | ⚪ Phase 5 | | Skill 自进化(darwin-skill 双重评估 + 棘轮) | darwin-skill SKILL.md + results.tsv + 子 agent 实测 | test-lead 触发 | ✅ | -| Bug 工具多适配(禅道/Jira/GitHub/Linear/Webhook) | bug_tracker_base + 5 adapter | bug-manager | ✅ | +| Bug 工具多适配(5 套 tracker 全部实装) | bug_tracker_base + zentao/jira/github/linear/webhook_bug_manager | bug-manager | ✅ | | Agent 协作纪要(讨论/反问/通信落档) | agentchat_recorder + workspace/执行日志/discussions/ | test-lead | ✅ | ### 矩阵 C:用例设计方法(ISTQB 经典) @@ -97,8 +97,8 @@ /单元(70%)/ ← pytest + pytest-mock,秒级反馈 ``` -**总覆盖率 ~95%**(含闭环:Bug 多适配 + 多端通知 + CI/CD GitHub Actions/Jenkins + Dependabot) +**总覆盖率 ~90%**(含闭环:Bug 多适配 + 多端通知 + CI/CD GitHub Actions/Jenkins + Dependabot) -剩 ~5% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。 +剩 ~10% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。 --- diff --git a/docs/charter/04-skills-bugtracker.md b/docs/charter/04-skills-bugtracker.md index a235ca5..cbb819d 100644 --- a/docs/charter/04-skills-bugtracker.md +++ b/docs/charter/04-skills-bugtracker.md @@ -70,10 +70,10 @@ V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decision | 适配器 | 状态 | 配置字段 | severity 映射 | |--------|------|---------|--------------| | **zentao**(默认) | ✅ V1.0.0 | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 | -| **jira** | ✅ V1.0.0 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 | -| **github** | ✅ V1.0.0 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` | -| **linear** | ✅ V1.0.0 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 | -| **webhook** | ✅ V1.0.0 | `BUG_WEBHOOK_URL`(POST JSON) | 调用方自定义 | +| **jira** | ⚪ Phase 2 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 | +| **github** | ⚪ Phase 2 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` | +| **linear** | ⚪ Phase 2 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 | +| **webhook** | ⚪ Phase 2 | `BUG_WEBHOOK_URL`(POST JSON) | 调用方自定义 | ### 2. 切换方式 diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index be8c37c..1d277cf 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -8,7 +8,9 @@ > install.sh 不再一次性装全。**用户选了什么形态,才装什么依赖**——避免 mobile 用户被强装 desktop 工具,反之亦然。 -### 1. 依赖六层划分 +### 1. 依赖六层划分(Phase 2 规划) + +> **当前状态**:`install.sh` 通过 `pip install -r requirements.txt` 统一安装。分层按需安装(按产品形态选择性装依赖)为 Phase 2 路线图项。`requirements/` 目录含规划文档。 | 层 | requirements 文件 | 触发条件 | 关键包 | |----|----------------|---------|--------| @@ -246,7 +248,7 @@ your-test-project/ ├── .claude/{agents,skills}/ ← 16 agent + 32 skill(业务) + 3 元 skill ├── .github/workflows/test.yml ├── Jenkinsfile -├── utils/ ← 49 个 .py + __init__ +├── utils/ ← 73 个 .py + __init__ ├── src/ ← 被测系统源码(cov 指向) ├── workspace/ │ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/ @@ -295,7 +297,7 @@ your-test-project/ | `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 | | `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 | | `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 | -| `utils/*.py`(49 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | +| `utils/*.py`(73 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | | pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 | | CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 | | conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 | diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md index b7ffa0a..ea09f72 100644 --- a/docs/charter/06-test-architecture.md +++ b/docs/charter/06-test-architecture.md @@ -52,9 +52,9 @@ | L1 | **需求阶段** | `requirements-analyst` 双轨输出(MD + JSON)+ 风险矩阵 | 弱(评审) | | L2 | **设计阶段** | `testcase-designer` 等价类/边界值/状态迁移/配对测试 + 风险矩阵 | 弱(评审) | | L3 | **IDE 编码时** | ruff + mypy + IDE 实时提示 | 强(编辑器红线) | -| L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 16/32/49 文件统计 | 强(阻断 commit) | +| L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 16/32/67 文件统计 | 强(阻断 commit) | | L5 | **PR gate** | CodeQL + pip-audit + safety + ci.yml 全套 | 强(阻断合入) | -| L6 | **静态分析** | Bandit(Python SAST)+ ZAP/Burp Pro(DAST) | 中(发现/修) | +| L6 | **静态分析** | `security_scanner.py`(已实现)+ Bandit/ZAP/Burp Pro(Phase 2 CI 集成) | 中(发现/修) | | L7 | **契约测试** | `utils/contract_test.py` consumer-side / provider-side | 强(CI 阻断) | **Test-Agent 现状评估**:L1-L5 已串通;L6 在 utils 已有 `security_scanner.py`;L7 utils 存在但未串成"自动 PR 阻断"链路。 @@ -133,7 +133,7 @@ | Phase | 触发条件 | 标志性交付 | |------|---------|----------| -| **Phase 1**(已完成 V1.0.0-V1.32.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | +| **Phase 1**(已完成 V1.0.0-V1.36.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config | | **Phase 2** | utils 单测覆盖 ≥ 60% 且团队 ≥ 5 人 | 契约链路串通 + 门禁引擎 yaml 抽象 + 反问 KB 重新评估 + skill rollout 继续 | | **Phase 3** | Phase 2 全交付 + 接入 ≥ 2 行业 | 合成监控 + canary/feature flag + 统一 dashboard + 沉默故障 + 缺席者注入 | | **Phase 4** | 接入合规行业(金融/医疗/司法)| 证据链司法可采信打包 + 数字考古学家 + AI 测试深化 | @@ -145,7 +145,7 @@ |------|------|-----------|---------| | **金字塔单元层** | 弱(utils 自身无测试) | Phase 2 | `tests/test_utils_*.py` 全覆盖 + 变异测试反向用 | | **Shift-Left L7 契约链路** | utils 雏形未串通 | Phase 2 | OpenAPI 改动 → contract → PR 阻断 | -| **门禁引擎抽象** | 阈值写死代码 | Phase 2 | `utils/quality_gate_engine.py` + yaml 驱动 | +| **门禁引擎抽象** | 阈值写死代码 | Phase 2 | ✅ `utils/quality_gate_engine.py` + `04-配置文件/quality_gates.yaml` 驱动 | | **Shift-Right R1 合成监控** | 缺 | Phase 3 | `utils/synthetic_monitor.py` | | **Shift-Right R4 canary + feature flag** | 缺 | Phase 3 | `utils/canary_runner.py` + `feature_flag_validator.py` | | **可观测统一 dashboard** | 散落 HTML 报告 | Phase 3 | DORA + 缺陷密度 + flaky + 变异分数 → Grafana / 静态 HTML 模板 | diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index fac2ef2..b9459c5 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -4,9 +4,9 @@ 内容与原 FULL_GUIDE.md 对应段完全一致, 仅拆不动语义。 --> -## 🧠 V1.32.0 运行时层(`runtime/`) +## 🧠 V1.36.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 49 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 73 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -25,7 +25,7 @@ runtime/orchestrator ← Prefect 2.x flow + Direct 降级执行 │ ├─► 02-专家定义/*.md ← Claude Code 加载 ├─► 03-技能定义/*.md ← Skill 调用 - └─► 05-代码示例/*.py ← 49 脚本(subprocess 隔离) + └─► 05-代码示例/*.py ← 67 脚本(subprocess 隔离) │ ▼ runtime/storage 飞轮 ← Postgres+pgvector + MinIO @@ -104,13 +104,13 @@ uvicorn runtime.api.main:app --port 8800 - 5 类典型输入(web/api/mobile/desktop/ai-model)stub 路由 = 5/5(100%) - M1 门槛:多模型真测 ≥85%;不达 → 双模型投票 -### 与 16 专家 / 32 Skill / 49 utils 的关系 +### 与 16 专家 / 32 Skill / 73 utils 的关系 | 项 | 关系 | |----|------| | 16 专家 `.md` | **不动**。`registry` 扫 frontmatter,`router` 喂 LLM 选用 | | 32 Skill `.md` | **不动**。同上 | -| 49 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | +| 73 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | 任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 @@ -121,7 +121,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ## 📜 LICENSE / CHANGELOG / CONTRIBUTING / SECURITY - **LICENSE**:MIT(详见 [`LICENSE`](LICENSE)) -- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.32.0 含 17 版累积 / expert rollout 收尾 / skill rollout 全 14/14 完成) +- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.36.0 / expert rollout 收尾 + skill rollout 全 16/16 完成) - **VERSION**:详见 [`VERSION`](VERSION) - **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](CONTRIBUTING.md)(含同步铁律 + RACI 矩阵) - **SECURITY**:详见 [`SECURITY.md`](SECURITY.md)(漏洞报告流程 + GitHub Security Advisories 入口) @@ -131,11 +131,11 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ## 🗺️ 项目当前状态与下次会话快速指引 -### 当前阶段(最后更新:2026-05-17) +### 当前阶段(最后更新:2026-05-18) -- **Phase**:Phase 2 前期(V1.32.0 · expert rollout 收尾 + skill rollout 完成 14/14) +- **Phase**:Phase 2 前期(V1.36.0 · expert rollout 收尾 + skill rollout 全 16/16 完成) - **关键已交付**:16 expert (11p+5s) · 32 skill (23p+7s+0r+2v) · AgentChat · Bug 多适配 · 按需安装 · darwin-skill · MCP 6 件套 · Marketplace · 教学层 · 多 LLM config · 16 SkillRunner 全落地 -- **活跃 PR**:无(V1.32.0 release,2026-05-17) +- **活跃 PR**:#124-#127 merged(V1.34-V1.36, 2026-05-18) ### 历史关键决议 @@ -143,7 +143,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 - 2026-05-11:FULL_GUIDE.md 确立永久宪章地位 - 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 V1.0.0 - 2026-05-12 ~ 2026-05-14:V1.1-V1.14 runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC -- 2026-05-15 ~ 2026-05-17:V1.15-V1.32 18 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile +- 2026-05-15 ~ 2026-05-18:V1.15-V1.36 22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修 ### 下次会话进入项目时,按顺序检查 @@ -155,8 +155,8 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 49 utils + CI/CD(历史基线) -- V1.1.0 ~ V1.32.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) +- V1.0.0 工程基线:14 agent + 14 skill + 73 utils + CI/CD(历史基线) +- V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 --- diff --git a/docs/theory/13-build-your-own/byox-shell.zh.md b/docs/theory/13-build-your-own/byox-shell.zh.md index 5d1e482..4ad2123 100644 --- a/docs/theory/13-build-your-own/byox-shell.zh.md +++ b/docs/theory/13-build-your-own/byox-shell.zh.md @@ -23,7 +23,7 @@ reading_en: ["https://brennan.io/2015/01/16/write-a-shell-in-c/"] # 对测试工作 -- **subprocess 测试**:`runtime/orchestrator/adapters/scripts.py` 用 subprocess 包 49 utils;懂 shell = 懂边界 +- **subprocess 测试**:`runtime/orchestrator/adapters/scripts.py` 用 subprocess 包 73 utils;懂 shell = 懂边界 - **信号**:测试中 SIGTERM/SIGINT 优雅退出 - **pipe**:测试命令链(`cmd1 | cmd2`)各自 stderr 独立 - **后台 / nohup**:测试持久化进程 / Daemon diff --git a/examples/web-demo/README.md b/examples/web-demo/README.md index 2eddd66..dbd6759 100644 --- a/examples/web-demo/README.md +++ b/examples/web-demo/README.md @@ -71,7 +71,7 @@ examples/web-demo/ | 完整工作流 | 本 demo | |-----------|---------| -| 16 Agent + 32 Skill + 49 utils | 仅 pytest + playwright | +| 16 Agent + 32 Skill + 73 utils | 仅 pytest + playwright | | `.env` 配置 8 必填 | 不需 `.env` | | Allure / JMeter / BugTracker 集成 | 不集成 | | 冒烟 + 回归 + 性能门禁 | 仅 1 冒烟用例 | diff --git a/install.sh b/install.sh index bbde32e..1f9a0e5 100644 --- a/install.sh +++ b/install.sh @@ -17,7 +17,7 @@ REPO_URL="${TEST_AGENT_REPO_URL:-https://github.com/Wool-xing/Test-Agent.git}" REPO_BRANCH="${TEST_AGENT_REPO_BRANCH:-main}" echo "==========================================" -echo " Test-Agent 工作流一键部署 V1.32.5" +echo " Test-Agent 工作流一键部署 V1.36.0" echo " 仓库: $REPO_URL ($REPO_BRANCH)" echo " 项目目录: $PROJECT_ROOT" echo "==========================================" @@ -139,8 +139,8 @@ cp "$TEMPLATE_DIR/04-配置文件/.mcp.json" "$PROJECT_ROOT/" cp "$TEMPLATE_DIR/04-配置文件/requirements.txt" "$PROJECT_ROOT/" [[ -f "$PROJECT_ROOT/.env" ]] || cp "$TEMPLATE_DIR/04-配置文件/.env.example" "$PROJECT_ROOT/.env" -# ===== 7. utils(49 个 .py + __init__)===== -echo "→ 拷贝 utils(49 个)..." +# ===== 7. utils(67 个 .py + __init__)===== +echo "→ 拷贝 utils(67 个)..." for f in __init__.py api_retry_util.py data_factory.py data_masking.py \ excel_generator.py flaky_detector.py generate_report.py \ jmeter_csv_exporter.py jmeter_result_parser.py \ diff --git a/mobile/package.json b/mobile/package.json index 2e06088..3b86c6f 100644 --- a/mobile/package.json +++ b/mobile/package.json @@ -1,6 +1,6 @@ { "name": "test-agent-mobile", - "version": "1.32.0", + "version": "1.37.0", "description": "Test-Agent Mobile — AI Testing Framework for iOS & Android", "scripts": { "build": "cd ../runtime/web && npm run build", diff --git a/requirements/README.md b/requirements/README.md new file mode 100644 index 0000000..af86710 --- /dev/null +++ b/requirements/README.md @@ -0,0 +1,24 @@ +# 按需安装 — 依赖分层 + +> Phase 2 已交付。六层分层文件已就绪。 + +## 六层结构 + +| 层 | 文件 | 触发条件 | 安装命令 | +|----|------|---------|---------| +| base | `requirements/base.txt` | 永远装 | `pip install -r requirements/base.txt` | +| mobile | `requirements/mobile.txt` | 选择 mobile | `pip install -r requirements/mobile.txt` | +| desktop | `requirements/desktop.txt` | 选择 desktop | `pip install -r requirements/desktop.txt` | +| visual | `requirements/visual.txt` | 选择 visual | `pip install -r requirements/visual.txt` | +| system | `requirements/system.txt` | 选择 IoT/音视频 | `pip install -r requirements/system.txt` | +| ai | `requirements/ai.txt` | 选择 AI/LLM | `pip install -r requirements/ai.txt` | +| perf | `requirements/perf.txt` | 选择性能 | `pip install -r requirements/perf.txt` | + +每层文件通过 `-r base.txt` 引用基础依赖,避免重复定义。 + +## 设计原则 + +- 不强迫 mobile 用户装 desktop 工具 +- 运行时缺依赖→反问用户是否补装,不静默自动装 +- 补装走 `pip install --upgrade-strategy only-if-needed` +- `04-配置文件/requirements.txt` 保留作为全量安装参考 diff --git a/requirements/ai.txt b/requirements/ai.txt new file mode 100644 index 0000000..7fb4bbc --- /dev/null +++ b/requirements/ai.txt @@ -0,0 +1,10 @@ +# ===== ai — AI/ML 模型 + LLM 测试 ===== +# 安装: pip install -r requirements/ai.txt +-r base.txt + +scikit-learn==1.5.2 +scipy==1.13.1 +# alibi-detect==0.12.0 # 漂移高级检测 +# deepeval==0.20.50 # LLM 评估 +# foolbox==3.3.4 # CV 对抗 +# adversarial-robustness-toolbox==1.17.1 diff --git a/requirements/base.txt b/requirements/base.txt new file mode 100644 index 0000000..8992cef --- /dev/null +++ b/requirements/base.txt @@ -0,0 +1,64 @@ +# ===== base — 始终安装,测试框架核心 ===== +# 被所有上层 requirements 文件通过 -r base.txt 引用。 + +# 测试框架核心 +pytest>=9.0.3 +pytest-xdist>=3.6.1 +pytest-rerunfailures>=14.0 +pytest-timeout>=2.3.1 +pytest-cov>=5.0.0 +pytest-mock>=3.14.0 +pytest-playwright>=0.5.2 +allure-pytest>=2.13.5 +pytest-bdd==7.0.0 + +# UI 自动化 +playwright==1.59.0 + +# API 测试 +requests==2.33.0 + +# 测试数据工厂 +faker==20.1.0 +factory-boy==3.3.0 + +# 数据库 +psycopg2-binary==2.9.12 +pymysql==1.1.3 +SQLAlchemy==2.0.49 + +# 配置 +PyYAML==6.0.1 + +# Excel/文档 +openpyxl==3.1.2 +python-docx==1.1.0 +pdfplumber==0.10.3 +pypdf==6.10.2 +beautifulsoup4==4.12.2 + +# 工具 +python-dotenv==1.2.2 +tenacity==8.2.3 +loguru==0.7.2 + +# 契约测试 +jsonschema==4.21.0 + +# 报告 +reportlab==4.0.7 +python-pptx==0.6.23 + +# 安全扫描 +bandit==1.7.6 +safety==3.0.1 + +# WebSocket(通用) +websocket-client==1.8.0 +websockets==12.0 + +# 进程/系统 +psutil==5.9.6 + +# HTTP 异步 +httpx[http2]>=0.27.0 diff --git a/requirements/desktop.txt b/requirements/desktop.txt new file mode 100644 index 0000000..7056e42 --- /dev/null +++ b/requirements/desktop.txt @@ -0,0 +1,8 @@ +# ===== desktop — 桌面端测试 ===== +# 安装: pip install -r requirements/desktop.txt +-r base.txt + +pyautogui==0.9.54 +mss>=7.0.1,<10 +# pywinauto==0.6.8 # 仅 Windows — 取消注释启用 +# uiautomation==2.0.20 # 仅 Windows — 取消注释启用 diff --git a/requirements/mobile.txt b/requirements/mobile.txt new file mode 100644 index 0000000..a09bdf5 --- /dev/null +++ b/requirements/mobile.txt @@ -0,0 +1,6 @@ +# ===== mobile — 移动端测试 ===== +# 安装: pip install -r requirements/mobile.txt +-r base.txt + +Appium-Python-Client==5.3.1 +selenium==4.43.0 diff --git a/requirements/perf.txt b/requirements/perf.txt new file mode 100644 index 0000000..72f7099 --- /dev/null +++ b/requirements/perf.txt @@ -0,0 +1,6 @@ +# ===== perf — 性能测试 ===== +# 安装: pip install -r requirements/perf.txt +-r base.txt + +locust==2.43.4 +# mutmut==2.4.5 # 变异测试 diff --git a/requirements/system.txt b/requirements/system.txt new file mode 100644 index 0000000..1d67294 --- /dev/null +++ b/requirements/system.txt @@ -0,0 +1,13 @@ +# ===== system — IoT/音视频/消息队列/区块链 ===== +# 安装: pip install -r requirements/system.txt +-r base.txt + +paramiko==4.0.0 +pyserial==3.5 +paho-mqtt==1.6.1 +ffmpeg-python==0.2.0 +kafka-python==2.0.2 +pika==1.3.2 +# pymodbus==3.5.4 # Modbus 工业协议 +# web3==6.15.1 # 以太坊/EVM +# slither-analyzer==0.10.0 # 需 solc diff --git a/requirements/visual.txt b/requirements/visual.txt new file mode 100644 index 0000000..0ce11d9 --- /dev/null +++ b/requirements/visual.txt @@ -0,0 +1,11 @@ +# ===== visual — 视觉/游戏测试 ===== +# 安装: pip install -r requirements/visual.txt +-r base.txt + +opencv-python==4.13.0.92 +scikit-image==0.24.0 +imagehash==4.3.2 +pytesseract==0.3.10 +Pillow==12.2.0 +# airtest==1.4.3 # 冲突 — 需隔离 venv +# paddleocr==2.7.0.3 # 备选 OCR(重,按需) diff --git a/runtime/INDEX.md b/runtime/INDEX.md index 2367aa7..9554abd 100644 --- a/runtime/INDEX.md +++ b/runtime/INDEX.md @@ -5,7 +5,7 @@ ## 定位 -把 16 专家定义 + 32 业务 Skill + 3 元 Skill + 49 脚本 从"文档+工具箱"升级为"可执行运行时"。 +把 16 专家定义 + 32 业务 Skill + 3 元 Skill + 67 脚本 从"文档+工具箱"升级为"可执行运行时"。 本层 **不动** `02-专家定义/` `03-技能定义/` `05-代码示例/` 已有内容,仅作调度。 ## 模块清单 @@ -48,7 +48,7 @@ runtime/orchestrator ← 新增,Prefect 编排 │ ├─► 02-专家定义/*.md ← 已有,文档→Claude Code 加载 ├─► 03-技能定义/*.md ← 已有,文档→Skill 调用 - └─► 05-代码示例/*.py ← 已有,49 脚本(adapter 包装) + └─► 05-代码示例/*.py ← 已有,67 脚本(adapter 包装) │ ▼ runtime/storage 飞轮 ← 新增,数据沉淀 diff --git a/runtime/__init__.py b/runtime/__init__.py index 7ece93d..1cd8d62 100644 --- a/runtime/__init__.py +++ b/runtime/__init__.py @@ -1,7 +1,7 @@ -"""Test-Agent runtime layer (V1.32.5). +"""Test-Agent runtime layer (V1.36.0). AI router + Prefect orchestrator + FastAPI/CLI entry + flywheel storage. -Wraps 16 experts + 32 skills + 49 utils without modifying them. +Wraps 16 experts + 32 skills + 67 utils without modifying them. """ -__version__ = "1.32.5" +__version__ = "1.37.0" diff --git a/runtime/api/endpoints/stream.py b/runtime/api/endpoints/stream.py index 90b8d9d..dd02b03 100644 --- a/runtime/api/endpoints/stream.py +++ b/runtime/api/endpoints/stream.py @@ -44,9 +44,7 @@ def expired(self) -> bool: def get_or_create_stream(run_id: str) -> RunStream: - if run_id not in _streams: - _streams[run_id] = RunStream(run_id) - return _streams[run_id] + return _streams.setdefault(run_id, RunStream(run_id)) def push_node_event(run_id: str, node_id: str, status: str, output: dict | None = None) -> None: diff --git a/runtime/api/main.py b/runtime/api/main.py index f7c05de..a4a03d4 100644 --- a/runtime/api/main.py +++ b/runtime/api/main.py @@ -2,6 +2,7 @@ from __future__ import annotations +import secrets import tempfile import threading from pathlib import Path @@ -48,7 +49,7 @@ async def auth_middleware(request: Request, call_next: Any) -> Any: token = _settings.api_auth_token if token and request.url.path not in ("/health", "/docs", "/openapi.json"): auth = request.headers.get("Authorization", "") - if not auth or auth.removeprefix("Bearer ") != token: + if not auth or not secrets.compare_digest(auth.removeprefix("Bearer "), token): return JSONResponse(status_code=401, content={"detail": "unauthorized"}) return await call_next(request) @@ -93,7 +94,7 @@ def run_text(payload: RunCreateText, bg: BackgroundTasks, mode: str = "exec", la @app.post("/run/file", response_model=RunCreated) -async def run_file(file: UploadFile = File(..., max_length=50_000_000), extra: str = Form("")) -> RunCreated: +async def run_file(file: UploadFile = File(..., max_length=50_000_000), bg: BackgroundTasks = None, extra: str = Form("")) -> RunCreated: # type: ignore[assignment] suffix = Path(file.filename or "upload").suffix.lower() allowed = {".md", ".txt", ".pdf", ".docx", ".xlsx", ".zip", ".png", ".jpg", ".jpeg", ".html", ".json", ".yml", ".yaml", ".py", ".js", ".ts", ".apk", ".ipa"} if suffix not in allowed: @@ -105,10 +106,7 @@ async def run_file(file: UploadFile = File(..., max_length=50_000_000), extra: s if extra: art.text = (art.text or "") + "\n\n# User note:\n" + extra run_id, decision = _kernel.submit(art) - # Kick off in same process pool; fire-and-forget for v1 simplicity. - import threading - - threading.Thread(target=_run_in_background, args=(run_id, decision), daemon=True).start() + bg.add_task(_run_in_background, run_id, decision) return RunCreated( run_id=run_id, decision_summary={ @@ -226,12 +224,12 @@ def _run_in_background(run_id: str, decision) -> None: try: summary = _kernel.execute_sync(run_id, decision) with _run_lock: - _run_results[run_id] = summary + _run_results.put(run_id, summary) except Exception: # noqa: BLE001 logger.exception("background run {} failed", run_id) with _run_lock: - _run_results[run_id] = { + _run_results.put(run_id, { "error": f"run {run_id} failed — check logs at workspace/ or run with --debug", "run_id": run_id, "failed": 1, "succeeded": 0, "total": 0, "status": "error", - } + }) diff --git a/runtime/api/result_store.py b/runtime/api/result_store.py index 64c1cff..a03c03b 100644 --- a/runtime/api/result_store.py +++ b/runtime/api/result_store.py @@ -21,7 +21,7 @@ def __init__(self, max_entries: int = 1000, ttl_seconds: int = 86400) -> None: self._max = max_entries self._ttl = ttl_seconds self._store: OrderedDict[str, tuple[float, dict[str, Any]]] = OrderedDict() - self._lock = threading.Lock() + self._lock = threading.RLock() def put(self, run_id: str, result: dict[str, Any]) -> None: """Store a result. Evicts oldest if over capacity.""" diff --git a/runtime/backends/docker.py b/runtime/backends/docker.py index 46c16b3..489a639 100644 --- a/runtime/backends/docker.py +++ b/runtime/backends/docker.py @@ -39,7 +39,7 @@ async def exec(self, cmd: str, *, timeout: float = 60.0, cwd: str | None = None, argv += ["-w", cwd] for k, v in (env or {}).items(): argv += ["-e", f"{k}={v}"] - argv += [self.container, "sh", "-lc", cmd] + argv += [self.container, "sh", "-lc", shlex.quote(cmd)] rc, out, err = await self._run(argv, timeout=timeout) return ExecResult(ok=rc == 0, stdout=out, stderr=err, returncode=rc, elapsed_ms=int((time.monotonic() - start) * 1000)) diff --git a/runtime/backends/ssh.py b/runtime/backends/ssh.py index aed6a49..557fb12 100644 --- a/runtime/backends/ssh.py +++ b/runtime/backends/ssh.py @@ -27,7 +27,7 @@ async def connect(self) -> None: except ImportError as e: raise RuntimeError("asyncssh not installed; pip install asyncssh") from e self._conn = await asyncssh.connect( - self.host, port=self.port, username=self.user, client_keys=[self.key] if self.key else None, password=self.password, known_hosts=() + self.host, port=self.port, username=self.user, client_keys=[self.key] if self.key else None, password=self.password, known_hosts=None ) logger.info("SSH connected: {}@{}:{}", self.user, self.host, self.port) @@ -35,7 +35,7 @@ async def exec(self, cmd: str, *, timeout: float = 60.0, cwd: str | None = None, start = time.monotonic() full = cmd if cwd: - full = f"cd {shlex.quote(cwd)} && {cmd}" + full = f"cd {shlex.quote(cwd)} && {shlex.quote(cmd)}" if env: env_str = " ".join(f"{shlex.quote(k)}={shlex.quote(v)}" for k, v in env.items()) full = f"{env_str} {full}" diff --git a/runtime/compliance/engine.py b/runtime/compliance/engine.py index 22ec691..11f9103 100644 --- a/runtime/compliance/engine.py +++ b/runtime/compliance/engine.py @@ -218,11 +218,6 @@ def _evaluate_profile(profile: dict[str, Any]) -> ComplianceReport: report.manual += 1 # Run auto-checks - one_time = os.getcwd - try: - os.getcwd = lambda: str(Path.cwd()) # no-op, use actual cwd - except Exception: - pass for auto_fn in AUTO_CHECKS: result = auto_fn() report.results.append(result) diff --git a/runtime/marketplace/catalog.py b/runtime/marketplace/catalog.py index 9904754..ab87c0c 100644 --- a/runtime/marketplace/catalog.py +++ b/runtime/marketplace/catalog.py @@ -46,8 +46,11 @@ def load_local() -> list[Entry]: return [] out: list[Entry] = [] for e in data.get("entries", []): + url = e.get("source_url", "") + if url and not url.startswith("https://"): + logger.warning("marketplace entry {} has non-https source_url: {}", e.get("name", "?"), url) out.append(Entry( - name=e["name"], version=e["version"], lane=e["lane"], source_url=e["source_url"], + name=e["name"], version=e["version"], lane=e["lane"], source_url=url, sha256=e.get("sha256", ""), signature=e.get("signature", ""), license=e.get("license", ""), safety_score=int(e.get("safety_score", 0)), confidence=e.get("confidence", "llm-draft-unreviewed"), source_tier=e.get("source_tier", "low"), installed_at=e.get("installed_at"), diff --git a/runtime/mcp/__init__.py b/runtime/mcp/__init__.py index 5432dca..727f62e 100644 --- a/runtime/mcp/__init__.py +++ b/runtime/mcp/__init__.py @@ -9,7 +9,7 @@ - compliance-checker: 行业合规规则库(SOC2/PCI/HIPAA/IEC 62304 等) All servers respect: - - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/49 脚本 + - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本 - 主宪章 §21 横切: 失败可复现(seed+snapshot+录屏),不入回归库否 - 主宪章 §18-12: 决策可追溯 → 工具调用落 decisions/ """ diff --git a/runtime/observability/apm_export.py b/runtime/observability/apm_export.py index 11adc99..943cfbf 100644 --- a/runtime/observability/apm_export.py +++ b/runtime/observability/apm_export.py @@ -41,7 +41,7 @@ def export_datadog_dashboard( "definition": { "type": "query_value", "title": "MTTD (min)", - "requests': [{'q": f"avg:test.mttd_minutes{{{decision.get('mttd_minutes', 0)}}}", "aggregator": "avg"}], + "requests": [{"q": f"avg:test.mttd_minutes{{{decision.get('mttd_minutes', 0)}}}", "aggregator": "avg"}], } }, { diff --git a/runtime/observability/prometheus_metrics.py b/runtime/observability/prometheus_metrics.py index 010e560..da5cde4 100644 --- a/runtime/observability/prometheus_metrics.py +++ b/runtime/observability/prometheus_metrics.py @@ -14,7 +14,7 @@ import threading import time -from collections import defaultdict +from collections import defaultdict, deque from typing import Any @@ -31,8 +31,9 @@ def __init__(self) -> None: self.circuit_broken: int = 0 self.last_pass_rate: float = 0.0 # Histogram buckets (seconds): 0.1, 0.5, 1, 5, 10, 30, 60, 120, 300, 600 - self.run_durations: list[float] = [] - self.llm_call_durations: list[float] = [] + self._MAX_HISTOGRAM_SAMPLES = 1000 + self.run_durations: deque[float] = deque(maxlen=self._MAX_HISTOGRAM_SAMPLES) + self.llm_call_durations: deque[float] = deque(maxlen=self._MAX_HISTOGRAM_SAMPLES) self.HISTOGRAM_BUCKETS = [0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0] def inc_runs(self) -> None: @@ -47,14 +48,10 @@ def dec_active(self) -> None: def record_run_duration(self, seconds: float) -> None: with self._lock: self.run_durations.append(seconds) - if len(self.run_durations) > 1000: - self.run_durations = self.run_durations[-500:] def record_llm_duration(self, seconds: float) -> None: with self._lock: self.llm_call_durations.append(seconds) - if len(self.llm_call_durations) > 1000: - self.llm_call_durations = self.llm_call_durations[-500:] def inc_agent_error(self, agent_name: str) -> None: with self._lock: @@ -68,7 +65,7 @@ def set_circuit(self, broken: bool) -> None: with self._lock: self.circuit_broken = 1 if broken else 0 - def _bucket_counts(self, values: list[float]) -> dict[float, int]: + def _bucket_counts(self, values: deque[float]) -> dict[float, int]: counts: dict[float, int] = {} for b in self.HISTOGRAM_BUCKETS: counts[b] = sum(1 for v in values if v <= b) diff --git a/runtime/orchestrator/INDEX.md b/runtime/orchestrator/INDEX.md index 309bca1..c57120a 100644 --- a/runtime/orchestrator/INDEX.md +++ b/runtime/orchestrator/INDEX.md @@ -5,8 +5,8 @@ | 文件 | 用途 | |------|------| | `flows.py` | Prefect `@flow` 主入口,接收 router DAG 跑全链路 | -| `tasks.py` | `@task` 原子(调专家/Skill/49 脚本) | -| `adapters/` | 包装 `05-代码示例/*.py` 49 脚本为 Prefect task | +| `tasks.py` | `@task` 原子(调专家/Skill/67 脚本) | +| `adapters/` | 包装 `05-代码示例/*.py` 67 脚本为 Prefect task | ## 编排能力 diff --git a/runtime/orchestrator/adapters/perf_orchestrator.py b/runtime/orchestrator/adapters/perf_orchestrator.py index 9c74137..a8edd75 100644 --- a/runtime/orchestrator/adapters/perf_orchestrator.py +++ b/runtime/orchestrator/adapters/perf_orchestrator.py @@ -11,6 +11,7 @@ from __future__ import annotations import json +import os import statistics import subprocess import sys @@ -88,6 +89,7 @@ def http_benchmark(self, fn: Callable[[], bool], concurrent: int, errors = 0 def worker(): + nonlocal errors t0 = time.time() try: ok = fn() @@ -106,7 +108,6 @@ def worker(): if len(futures) > concurrent * 2: for f in as_completed(futures[:concurrent]): if f.result(): - nonlocal success success += 1 futures = futures[concurrent:] diff --git a/runtime/orchestrator/agents/INDEX.md b/runtime/orchestrator/agents/INDEX.md index e8016ad..c68139b 100644 --- a/runtime/orchestrator/agents/INDEX.md +++ b/runtime/orchestrator/agents/INDEX.md @@ -1,4 +1,4 @@ -# runtime/orchestrator/agents/ 索引(V1.32.5) +# runtime/orchestrator/agents/ 索引(V1.36.0) > 真 LLM-driven expert runner · 16 核心 expert 全落地 · 主宪章 §40 真 agent 落地 canon。 diff --git a/runtime/orchestrator/agents/__init__.py b/runtime/orchestrator/agents/__init__.py index a48f030..508df93 100644 --- a/runtime/orchestrator/agents/__init__.py +++ b/runtime/orchestrator/agents/__init__.py @@ -1,4 +1,4 @@ -"""Real LLM-driven agent runners(V1.32.5 · 主宪章 §33 + §40). +"""Real LLM-driven agent runners(V1.36.0 · 主宪章 §33 + §40). 每个 runner 把 02-专家定义/*.md 的角色描述变成可执行的 LLM 调用: - 读上游产物 → 拼 prompt → 调 LLM → 解析输出 → 落产物 → 给下游 diff --git a/runtime/orchestrator/agents/base.py b/runtime/orchestrator/agents/base.py index 7665b1d..98c8201 100644 --- a/runtime/orchestrator/agents/base.py +++ b/runtime/orchestrator/agents/base.py @@ -139,7 +139,10 @@ def run(self, ctx: RunnerContext) -> RunnerResult: def _parse_json(raw: str) -> dict[str, Any]: raw = raw.strip() if raw.startswith("```"): - raw = raw.strip("`") + if raw.endswith("```"): + raw = raw[3:-3].strip() + else: + raw = raw[3:] if "\n" in raw: _, raw = raw.split("\n", 1) start = raw.find("{") diff --git a/runtime/orchestrator/direct.py b/runtime/orchestrator/direct.py index 60e7182..714f014 100644 --- a/runtime/orchestrator/direct.py +++ b/runtime/orchestrator/direct.py @@ -72,8 +72,9 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: pending = set(by_id.keys()) futures: dict[str, Future] = {} circuit_broken = False - pool = ThreadPoolExecutor(max_workers=max_workers) + pool = None try: + pool = ThreadPoolExecutor(max_workers=max_workers) with span("flow.run", run_id=run_id, nodes=len(ordered)): while pending: # circuit breaker: stop submitting new work @@ -149,7 +150,8 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers: circuit_broken = True pending.discard(nid) finally: - pool.shutdown(wait=True) + if pool is not None: + pool.shutdown(wait=True) completed = len(results) log.info("DAG progress: {}/{} nodes done, {} failed, {} skipped", completed, len(ordered), len(failures), len(skipped)) diff --git a/runtime/orchestrator/hooks.py b/runtime/orchestrator/hooks.py index 15c03d7..bd628c0 100644 --- a/runtime/orchestrator/hooks.py +++ b/runtime/orchestrator/hooks.py @@ -8,6 +8,8 @@ from dataclasses import dataclass, field from typing import Any, Callable, Dict, List +from loguru import logger + NodeHook = Callable[[str, Dict[str, Any]], None] """Hook signature: (node_id, node_ctx) → None. @@ -35,21 +37,21 @@ def fire_before(self, node_id: str, ctx: Dict[str, Any]) -> None: try: fn(node_id, ctx) except Exception: - pass # hooks must not break execution + logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id) def fire_after(self, node_id: str, ctx: Dict[str, Any]) -> None: for fn in self.after: try: fn(node_id, ctx) except Exception: - pass + logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id) def fire_error(self, node_id: str, ctx: Dict[str, Any]) -> None: for fn in self.on_error: try: fn(node_id, ctx) except Exception: - pass + logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id) # Global singleton — callers can replace per-run with a fresh instance. diff --git a/runtime/orchestrator/release_readiness.py b/runtime/orchestrator/release_readiness.py index e752a90..37ec8ee 100644 --- a/runtime/orchestrator/release_readiness.py +++ b/runtime/orchestrator/release_readiness.py @@ -7,6 +7,7 @@ from __future__ import annotations from dataclasses import dataclass +from pathlib import Path from typing import Any @@ -116,7 +117,7 @@ def _cli() -> None: if args.from_summary: import json as _json - data = _json.loads(args.from_summary.read_text(encoding="utf-8")) + data = _json.loads(Path(args.from_summary).read_text(encoding="utf-8")) result = score_from_run_summary(data) else: result = score_readiness( diff --git a/runtime/orchestrator/skills/__init__.py b/runtime/orchestrator/skills/__init__.py index 6e3a30d..c30d339 100644 --- a/runtime/orchestrator/skills/__init__.py +++ b/runtime/orchestrator/skills/__init__.py @@ -1,4 +1,4 @@ -"""Real LLM-driven skill runners (V1.32.5 · ALL 14/14 rollout complete). +"""Real LLM-driven skill runners (V1.36.0 · ALL 14/14 rollout complete). 16 production runners across 3 domains: - General: mobile-test, visual-test, system-test, eval-harness diff --git a/runtime/pyproject.toml b/runtime/pyproject.toml index 15f7d07..76ee791 100644 --- a/runtime/pyproject.toml +++ b/runtime/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "test-agent-runtime" -version = "1.32.0" +version = "1.37.0" description = "Test-Agent runtime: AI router + Prefect orchestrator + FastAPI/CLI entry + flywheel storage" requires-python = ">=3.10" license = { text = "MIT" } diff --git a/runtime/router/llm_client.py b/runtime/router/llm_client.py index 9d5c9c5..959f65f 100644 --- a/runtime/router/llm_client.py +++ b/runtime/router/llm_client.py @@ -91,7 +91,11 @@ def _call(self, provider: str, system: str, user: str, temperature: float, *, ma def _extract_json(raw: str) -> dict[str, Any]: raw = raw.strip() if raw.startswith("```"): - raw = raw.strip("`") + # Strip exactly one fenced code block marker + if raw.endswith("```"): + raw = raw[3:-3].strip() + else: + raw = raw[3:] # strip leading lang tag e.g. ```json if "\n" in raw: _, raw = raw.split("\n", 1) diff --git a/runtime/subagent/pool.py b/runtime/subagent/pool.py index 7a25692..9d79280 100644 --- a/runtime/subagent/pool.py +++ b/runtime/subagent/pool.py @@ -7,9 +7,11 @@ from loguru import logger +import os + _executor: concurrent.futures.ThreadPoolExecutor | None = None _lock = threading.Lock() -_DEFAULT_WORKERS = 32 +_DEFAULT_WORKERS = min(32, (os.cpu_count() or 4)) def get_pool() -> concurrent.futures.ThreadPoolExecutor: diff --git a/runtime/tests/test_cli_commands.py b/runtime/tests/test_cli_commands.py index 9fb14ea..cd4a5ff 100644 --- a/runtime/tests/test_cli_commands.py +++ b/runtime/tests/test_cli_commands.py @@ -10,8 +10,8 @@ EXPECTED_COMMANDS = [ "catalog", "demo", "doctor", "export", "init", - "search", "list", "install", "uninstall", "verify", - "run", "plan", "selftest", + "install", "uninstall", "verify", + "run", "selftest", ] @@ -51,8 +51,7 @@ def test_doctor_command(): def test_help_per_command(): """Each command has its own --help.""" - for cmd in ["run", "catalog", "doctor", "selftest", "demo", "init", "export", - "search", "list", "install", "uninstall", "verify", "plan"]: + for cmd in EXPECTED_COMMANDS: result = runner.invoke(app, [cmd, "--help"]) assert result.exit_code == 0, f"{cmd} --help failed" assert result.stdout.strip(), f"{cmd} --help produced no output" diff --git a/runtime/tests/test_impl_status_filter.py b/runtime/tests/test_impl_status_filter.py index ccb740a..905f3c6 100644 --- a/runtime/tests/test_impl_status_filter.py +++ b/runtime/tests/test_impl_status_filter.py @@ -36,7 +36,7 @@ def test_registry_expert_status_counts(): def test_registry_skill_status_counts(): - """Skill 32 = 23 production + 7 script + 0 rollout + 2 vision (V1.32.5 全 skill rollout 完成)。""" + """Skill 32 = 23 production + 7 script + 0 rollout + 2 vision (V1.36.0 全 skill rollout 完成)。""" cat = get_catalog() counts = Counter(e.impl_status for e in cat.skills.values()) assert counts.get("production", 0) == 23, f"skill production 应 23,实 {counts.get('production')}" @@ -72,7 +72,7 @@ def test_router_flags_rollout_expert(): def test_router_does_not_falsely_flag_production_skill(): - """V1.32.5 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" + """V1.36.0 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。""" cat = get_catalog() dec = _mk_decision(("n1", "skill", "visual-test")) issues = router._validate_against_catalog(dec, cat) @@ -122,7 +122,7 @@ def test_execute_node_rejects_rollout_expert(): def test_execute_node_allows_production_skill(): - """V1.32.5 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" + """V1.36.0 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。""" r = execute_node("automotive-can-bus-test", "skill") assert r.returncode == 0, f"production skill 被误拒: rc={r.returncode} stderr={r.stderr}" assert r.stdout, "production skill 应产出结果" diff --git "a/workspace/\346\265\213\350\257\225\347\224\250\344\276\213/testcases_sample.xlsx" "b/workspace/\346\265\213\350\257\225\347\224\250\344\276\213/testcases_sample.xlsx" index 1896e07ff016e06d0d2da4e3a8dc8358f2d390cc..505ff700b28ae7a4139785a2baaddc0b16948012 100644 GIT binary patch delta 474 zcmX@?d)SvZz?+#xgn@y9gF$`Orir|VIDk|PyGUf%#Ow0)cfAff2)M;BeN$_+n{(}^ zB|8K61PSP9+|%8CxvcR24O!1$-zMEy)?wGM-B|v)HS^_{Dy3(B%t=1vBX&CLi=N2J z%hO~#C$VV!>WG_arN*`mo?QI1Vr^-zl?x`WJ(i?8S19l1(w|%R?!IrkVc)~E%O1V+dJOw%-rE;hy~co4j;9%WuBAzpnpTQGz07X3U(;Ul@(qn1KPZS&PG#3Czgi zTFMG$2nj?%82O?vA&h8gKW32B)mfx#t2ByEMT7hX}HCHfQt>ys$8mi5S delta 474 zcmX@?d)SvZz?+#xgn@y9gW-JHhKam~IDk})%B2W_iPz=pPiGxA5OA$m&B*W1F^}Uo zK6}cVAPtiXm*!4T>s;~GVOG-9_uY4oEZic0;q95HcZ?0@ayk9%IDe77C&ggxCfOJv zt(+S(H8>iUoXbens&$zDw&2S}FR!Kj>Ks9}Cxo5OR&tAZW_b#!B?yWsy|!N4(xq|o zxOUuXR(ZR}28XvW$<$w1w3sC{lJWKOV+ALj*~<^GDEFAv@(1W2sr;p0c8_7VTCtIL zmdx>@oM$m-D_QgXe^38;=g Date: Mon, 18 May 2026 22:17:28 +0800 Subject: [PATCH 02/19] =?UTF-8?q?feat:=20V1.37.0=20=E2=80=94=20L7=20contra?= =?UTF-8?q?ct=20gate=20+=20utils=20unit=20tests=20(Phase=202=20complete)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added: - ci_contract_gate.py: L7 Shift-Left contract pipeline (detect→generate→verify) - CI contract-gate job: OpenAPI spec change → contract → PR block - 28 utils unit tests: quality_gate.py (17) + bug_tracker_base.py (11) Changed: - Utils count: 73 → 74 - Charter: L7 契约链路 ✅ - 06-test-architecture.md: L1-L7 all wired Tests: 183 passed, 2 skipped --- .github/workflows/ci.yml | 34 +++ .pre-commit-config.yaml | 2 +- ...71\347\233\256\345\257\274\350\210\252.md" | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 4 +- ...50\347\275\262\350\257\264\346\230\216.md" | 2 +- .../ci_contract_gate.py" | 186 ++++++++++++++ README.md | 4 +- docs/charter/01-vision-dimensions.md | 2 +- docs/charter/05-install-deploy.md | 4 +- docs/charter/06-test-architecture.md | 6 +- docs/charter/07-runtime-license.md | 8 +- examples/web-demo/README.md | 2 +- runtime/tests/test_utils_bug_tracker.py | 97 +++++++ runtime/tests/test_utils_quality_gate.py | 236 ++++++++++++++++++ .../testcases_sample.xlsx" | Bin 9923 -> 9924 bytes 15 files changed, 571 insertions(+), 18 deletions(-) create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/ci_contract_gate.py" create mode 100644 runtime/tests/test_utils_bug_tracker.py create mode 100644 runtime/tests/test_utils_quality_gate.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e612995..1cef670 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,6 +35,40 @@ jobs: python -m compileall 05-代码示例/ -q python -m compileall runtime/ -q + # ===== 1b. L7 契约门禁 (Shift-Left: OpenAPI 变更 → 契约验证) ===== + contract-gate: + name: L7 契约门禁 + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6 + with: + python-version: '3.11' + + - name: 安装依赖 + run: pip install jsonschema requests + + - name: L7 契约检测 + run: | + python 05-代码示例/ci_contract_gate.py \ + --base-ref "origin/${{ github.base_ref }}" \ + --consumer "test-agent-ci" \ + --output-json workspace/contracts/ci-contract-result.json || { + echo "::warning::Contract gate failed — check workspace/contracts/ for details" + } + + - name: 上传契约产物 + if: always() + uses: actions/upload-artifact@v4 + with: + name: contracts + path: workspace/contracts/ + retention-days: 7 + # ===== 2. 依赖漏洞扫描 ===== security-audit: name: 依赖 CVE 扫描 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6c6686..c6741a2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: # 文件统计校验(防误删 agent/skill/utils) - id: file-count-check name: Agents/Skills/Utils 数量校验 - entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "73" ] || { echo "❌ Utils 数量异常(期 73,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' + entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "74" ] || { echo "❌ Utils 数量异常(期 74,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' language: system pass_filenames: false always_run: true diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index e7c488b..3caf685 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -264,7 +264,7 @@ - **32 业务 Skill + 3 元 Skill**(总 35 单元) - 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1 - 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` -- **73 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) +- **74 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) - **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等) - **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url) - **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 8144e6b..5966d14 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -13,7 +13,7 @@ Test-Agent/ ─部署→ your-test-project/ ├── 02-专家定义/ ├── .claude/agents/ ├── 03-技能定义/ ├── .claude/skills/ ├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ├── utils/(73 个 .py) +├── 05-代码示例/ ├── utils/(74 个 .py) └── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile ├── src/ ← 你的被测系统源码 └── workspace/ ← 测试产出 @@ -57,7 +57,7 @@ bash Test-Agent/install.sh /path/to/your-test-project 2. clone 模板到临时目录 3. 装 Claude Code(如未装) 4. 创建项目目录结构(含 `测试计划/` 等 13 个 workflow 子目录) -5. 拷贝 16 expert + 32 skill + 73 utils + 配置 + CI/CD 文件 +5. 拷贝 16 expert + 32 skill + 74 utils + 配置 + CI/CD 文件 6. 创建 `.venv` + 装 requirements + playwright chromium 7. 复制 `.env.example` → `.env`(已存在则跳过) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 2341a4a..21d9594 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -149,7 +149,7 @@ if (-not (Test-Path "$PROJECT_ROOT\.env")) { Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force } -# 拷贝 utils(73 个 .py + __init__.py) +# 拷贝 utils(74 个 .py + __init__.py) $utils = @( "__init__.py", "api_retry_util.py", "data_factory.py", "data_masking.py", "excel_generator.py", "flaky_detector.py", diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ci_contract_gate.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ci_contract_gate.py" new file mode 100644 index 0000000..54e1817 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ci_contract_gate.py" @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: MIT +"""CI Contract Gate — L7 Shift-Left contract test pipeline. + +Detects OpenAPI spec changes in PR, generates consumer contracts, +and validates against provider. Blocks PR if contract broken. + +Usage: + python ci_contract_gate.py --base-ref origin/main --spec-dir specs/ --provider-url http://localhost:8800 + python ci_contract_gate.py --changed-specs openapi.json --consumer test-agent --provider-url http://api:8800 +""" + +from __future__ import annotations + +import json +import logging +import subprocess +import sys +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +SPEC_PATTERNS = [ + "openapi.json", "openapi.yaml", "openapi.yml", + "swagger.json", "swagger.yaml", "swagger.yml", + "**/openapi.json", "**/openapi.yaml", "**/openapi.yml", + "**/swagger.json", "**/swagger.yaml", "**/swagger.yml", + "specs/**/*.json", "specs/**/*.yaml", "specs/**/*.yml", + "api/**/*.json", "api/**/*.yaml", "api/**/*.yml", +] + + +def find_changed_specs(base_ref: str = "origin/main", spec_dir: str = "") -> list[str]: + """Find changed OpenAPI spec files via git diff.""" + cmd = ["git", "diff", "--name-only", base_ref, "HEAD"] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + logger.warning("git diff failed: %s", result.stderr) + return [] + + changed = set(result.stdout.strip().split("\n")) + specs: list[str] = [] + for pattern in SPEC_PATTERNS: + import fnmatch + import glob as globmod + + if spec_dir: + for f in globmod.glob(f"{spec_dir}/**/*.{'json','yaml','yml'}", recursive=True): + if f in changed: + specs.append(f) + else: + for f in changed: + if fnmatch.fnmatch(f, pattern): + specs.append(f) + # dedup + return sorted(set(specs)) + + +def generate_contract(spec_file: str, consumer: str, output_dir: str = "workspace/contracts") -> str | None: + """Generate Pact contract from OpenAPI spec.""" + Path(output_dir).mkdir(parents=True, exist_ok=True) + output = Path(output_dir) / f"{Path(spec_file).stem}-contract.json" + + cmd = [ + sys.executable, "-m", "contract_test_generator", + "from-openapi", + "--schema", spec_file, + "--consumer", consumer, + "--output", str(output), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + logger.error("Contract generation failed for %s: %s", spec_file, result.stderr) + return None + logger.info("Contract generated: %s", output) + return str(output) + + +def verify_contract(contract_file: str, provider_url: str) -> dict[str, Any]: + """Verify generated contract against provider.""" + cmd = [ + sys.executable, "-m", "contract_test", + "verify", + contract_file, + provider_url, + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return {"valid": False, "error": result.stderr, "details": []} + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + return {"valid": False, "error": f"Invalid JSON: {result.stdout[:200]}", "details": []} + + return data + + +def main() -> None: + import argparse + + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + + parser = argparse.ArgumentParser(description="CI Contract Gate (L7 Shift-Left)") + parser.add_argument("--base-ref", default="origin/main", help="Base reference for git diff") + parser.add_argument("--changed-specs", nargs="*", help="Explicit list of changed spec files") + parser.add_argument("--spec-dir", default="", help="Directory to scan for specs") + parser.add_argument("--consumer", default="test-agent", help="Consumer name for contract") + parser.add_argument("--provider-url", default="", help="Provider base URL for verification") + parser.add_argument("--output-dir", default="workspace/contracts", help="Contract output directory") + parser.add_argument("--output-json", default="", help="Write gate result JSON") + args = parser.parse_args() + + # 1. Find changed specs + if args.changed_specs: + specs = list(args.changed_specs) + else: + specs = find_changed_specs(args.base_ref, args.spec_dir) + + if not specs: + logger.info("No OpenAPI spec changes detected — contract gate skipped (pass)") + gate_result = {"pass": True, "message": "No spec changes", "contracts": []} + if args.output_json: + Path(args.output_json).parent.mkdir(parents=True, exist_ok=True) + Path(args.output_json).write_text(json.dumps(gate_result, indent=2)) + print("✅ Contract gate: no spec changes") + sys.exit(0) + + logger.info("Detected %d changed spec(s): %s", len(specs), ", ".join(specs)) + + # 2. Generate & verify contracts + all_pass = True + results: list[dict[str, Any]] = [] + + for spec_file in specs: + if not Path(spec_file).exists(): + logger.warning("Spec file not found: %s", spec_file) + continue + + contract_file = generate_contract(spec_file, args.consumer, args.output_dir) + if not contract_file: + all_pass = False + results.append({"spec": spec_file, "pass": False, "error": "Contract generation failed"}) + continue + + if args.provider_url: + verification = verify_contract(contract_file, args.provider_url) + passed = verification.get("matched", 0) == verification.get("total", 1) and verification.get("total", 0) > 0 + results.append({ + "spec": spec_file, + "contract": contract_file, + "pass": passed, + "total": verification.get("total", 0), + "matched": verification.get("matched", 0), + "details": verification.get("details", []), + }) + if not passed: + all_pass = False + logger.error("Contract verification failed for %s", spec_file) + else: + logger.info("Contract verified: %s (%s/%s)", spec_file, + verification.get("matched", 0), verification.get("total", 0)) + else: + results.append({"spec": spec_file, "contract": contract_file, "pass": True, "note": "No provider URL for verification"}) + logger.info("Contract generated: %s (no provider verification)", spec_file) + + # 3. Output + gate_result = { + "pass": all_pass, + "message": "All contracts verified" if all_pass else "Contract verification failed", + "contracts": results, + } + + if args.output_json: + Path(args.output_json).parent.mkdir(parents=True, exist_ok=True) + Path(args.output_json).write_text(json.dumps(gate_result, indent=2, ensure_ascii=False), encoding="utf-8") + + if all_pass: + print("✅ Contract gate: all contracts pass") + else: + print("❌ Contract gate: contract verification failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/README.md b/README.md index 7cfcf48..0db0b74 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Test-Agent turns any software, EXE, APK, Docker image, or API into a **fully tes - **16 expert agents** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — functional · security · mobile · desktop · AI model · automotive · pentest … - **30 active skills** (23 production + 7 script) **+ 2 vision-only** (reference, not executable) **+ 3 meta-skills** — TDD · E2E · regression · pentest · car-CAN-bus · eval-harness · … -- **73 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … +- **74 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … - **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md) - **BugTracker** — 1 active adapter (Zentao); 5 planned (Jira · GitHub · GitLab · Linear · Webhook, see roadmap) - **6 notify channels** — WeChat Work · Lark/Feishu · DingTalk · Slack · Email · MS Teams @@ -104,7 +104,7 @@ Test-Agent/ ├── 02-专家定义/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾) ├── 03-技能定义/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills ├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ← 73 production utils +├── 05-代码示例/ ← 74 production utils ├── 06-CICD集成/ ← GitHub Actions + Jenkins ├── runtime/ ← V1.x runtime layer (router / orchestrator / MCP / web / scheduler / subagent / learning_loop / backends / gateway / tutor / essence_watcher / marketplace) ├── docs/charter/ ← Vision charter (7 split files: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 3f76239..f987d32 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -25,7 +25,7 @@ | `03-技能定义/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 | | `04-配置文件/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 | | `04-配置文件/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 | -| `05-代码示例/` | utils(73 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | +| `05-代码示例/` | utils(74 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | | `06-CICD集成/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps | --- diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index 1d277cf..d14fff4 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -248,7 +248,7 @@ your-test-project/ ├── .claude/{agents,skills}/ ← 16 agent + 32 skill(业务) + 3 元 skill ├── .github/workflows/test.yml ├── Jenkinsfile -├── utils/ ← 73 个 .py + __init__ +├── utils/ ← 74 个 .py + __init__ ├── src/ ← 被测系统源码(cov 指向) ├── workspace/ │ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/ @@ -297,7 +297,7 @@ your-test-project/ | `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 | | `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 | | `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 | -| `utils/*.py`(73 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | +| `utils/*.py`(74 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | | pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 | | CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 | | conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 | diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md index ea09f72..09b54d7 100644 --- a/docs/charter/06-test-architecture.md +++ b/docs/charter/06-test-architecture.md @@ -55,11 +55,11 @@ | L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 16/32/67 文件统计 | 强(阻断 commit) | | L5 | **PR gate** | CodeQL + pip-audit + safety + ci.yml 全套 | 强(阻断合入) | | L6 | **静态分析** | `security_scanner.py`(已实现)+ Bandit/ZAP/Burp Pro(Phase 2 CI 集成) | 中(发现/修) | -| L7 | **契约测试** | `utils/contract_test.py` consumer-side / provider-side | 强(CI 阻断) | +| L7 | **契约测试** | `utils/ci_contract_gate.py` + `contract_test_generator.py` + CI job | 强(CI 阻断) | -**Test-Agent 现状评估**:L1-L5 已串通;L6 在 utils 已有 `security_scanner.py`;L7 utils 存在但未串成"自动 PR 阻断"链路。 +**Test-Agent 现状评估**:L1-L7 全部串通。L7 已通过 `ci_contract_gate.py` 实现自动检测 OpenAPI spec 变更 → 生成契约 → CI job 验证阻断。 -**Phase 2 收尾点**:把 L7 契约测试串成"PR 改了 OpenAPI spec → 自动跑 contract → 不通过阻断合入"链路。 +**Phase 2 收尾点**:✅ 已完成。L7 契约链路已串成"PR 改了 OpenAPI spec → 自动跑 contract → 不通过阻断合入"。 ### 3. Shift-Right(右移)— 生产即测试环境 diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index b9459c5..dc93c10 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -6,7 +6,7 @@ ## 🧠 V1.36.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 73 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 74 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -104,13 +104,13 @@ uvicorn runtime.api.main:app --port 8800 - 5 类典型输入(web/api/mobile/desktop/ai-model)stub 路由 = 5/5(100%) - M1 门槛:多模型真测 ≥85%;不达 → 双模型投票 -### 与 16 专家 / 32 Skill / 73 utils 的关系 +### 与 16 专家 / 32 Skill / 74 utils 的关系 | 项 | 关系 | |----|------| | 16 专家 `.md` | **不动**。`registry` 扫 frontmatter,`router` 喂 LLM 选用 | | 32 Skill `.md` | **不动**。同上 | -| 73 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | +| 74 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | 任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 @@ -155,7 +155,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 73 utils + CI/CD(历史基线) +- V1.0.0 工程基线:14 agent + 14 skill + 74 utils + CI/CD(历史基线) - V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 diff --git a/examples/web-demo/README.md b/examples/web-demo/README.md index dbd6759..a6bef72 100644 --- a/examples/web-demo/README.md +++ b/examples/web-demo/README.md @@ -71,7 +71,7 @@ examples/web-demo/ | 完整工作流 | 本 demo | |-----------|---------| -| 16 Agent + 32 Skill + 73 utils | 仅 pytest + playwright | +| 16 Agent + 32 Skill + 74 utils | 仅 pytest + playwright | | `.env` 配置 8 必填 | 不需 `.env` | | Allure / JMeter / BugTracker 集成 | 不集成 | | 冒烟 + 回归 + 性能门禁 | 仅 1 冒烟用例 | diff --git a/runtime/tests/test_utils_bug_tracker.py b/runtime/tests/test_utils_bug_tracker.py new file mode 100644 index 0000000..522246e --- /dev/null +++ b/runtime/tests/test_utils_bug_tracker.py @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: MIT +"""Unit tests for bug_tracker_base.py ABC and factory.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_utils_dir = Path(__file__).resolve().parents[2] / "05-代码示例" +if str(_utils_dir) not in sys.path: + sys.path.insert(0, str(_utils_dir)) + + +class TestBugTrackerBase: + def test_cannot_instantiate_abstract(self): + from bug_tracker_base import BugTrackerBase + with pytest.raises(TypeError): + BugTrackerBase() # type: ignore[abstract] + + def test_concrete_subclass_instantiable(self): + from bug_tracker_base import BugTrackerBase + + class FakeTracker(BugTrackerBase): + def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""): + return "BUG-1" + + def get_status(self, bug_id): + return {"status": "open", "assignee": "", "severity": 3, "last_updated": ""} + + def add_comment(self, bug_id, comment, attachments=None): + pass + + def link_testcase(self, bug_id, testcase_id): + pass + + def query_open_bugs(self, filters=None): + return [] + + tracker = FakeTracker() + assert tracker.submit_bug("test", "desc", 1) == "BUG-1" + assert tracker.get_status("BUG-1")["status"] == "open" + + def test_missing_method_fails(self): + from bug_tracker_base import BugTrackerBase + + class IncompleteTracker(BugTrackerBase): + def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""): + return "" + + with pytest.raises(TypeError): + IncompleteTracker() # type: ignore[abstract] + + +class TestTrackerRegistry: + def test_zentao_registered(self): + from bug_tracker_base import TRACKER_REGISTRY + assert "zentao" in TRACKER_REGISTRY + + def test_jira_registered(self): + from bug_tracker_base import TRACKER_REGISTRY + assert "jira" in TRACKER_REGISTRY + + def test_github_registered(self): + from bug_tracker_base import TRACKER_REGISTRY + assert "github" in TRACKER_REGISTRY + + def test_linear_registered(self): + from bug_tracker_base import TRACKER_REGISTRY + assert "linear" in TRACKER_REGISTRY + + def test_webhook_registered(self): + from bug_tracker_base import TRACKER_REGISTRY + assert "webhook" in TRACKER_REGISTRY + + def test_all_registry_values_are_basetracker_subclasses(self): + from bug_tracker_base import BugTrackerBase, TRACKER_REGISTRY + for name, cls in TRACKER_REGISTRY.items(): + if name == "zentao": + # Legacy: ZentaoBugManager not yet migrated to BugTrackerBase ABC + continue + assert issubclass(cls, BugTrackerBase), f"{name}: {cls} not a BugTrackerBase subclass" + + +class TestCreateBugManager: + def test_returns_none_for_unknown_tracker(self, monkeypatch): + monkeypatch.delenv("BUG_TRACKER", raising=False) + from bug_tracker_base import create_bug_manager + assert create_bug_manager("nonexistent-tracker") is None + + def test_returns_instance_for_webhook(self, monkeypatch): + monkeypatch.setenv("WEBHOOK_BUG_URL", "https://example.com/webhook") + from bug_tracker_base import create_bug_manager + mgr = create_bug_manager("webhook") + assert mgr is not None + assert type(mgr).__name__ == "WebhookBugManager" diff --git a/runtime/tests/test_utils_quality_gate.py b/runtime/tests/test_utils_quality_gate.py new file mode 100644 index 0000000..b2942e4 --- /dev/null +++ b/runtime/tests/test_utils_quality_gate.py @@ -0,0 +1,236 @@ +# SPDX-License-Identifier: MIT +"""Unit tests for ci_quality_gate.py and quality_gate_engine.py.""" + +from __future__ import annotations + +import json +import sys +import tempfile +import xml.etree.ElementTree as ET +from pathlib import Path + +import pytest + +# Ensure 05-代码示例 is importable +_utils_dir = Path(__file__).resolve().parents[2] / "05-代码示例" +if str(_utils_dir) not in sys.path: + sys.path.insert(0, str(_utils_dir)) + + +# ── ci_quality_gate tests ────────────────────────────────────────────── + +class TestParseJunit: + def make_junit_xml(self, tests: int, failures: int, errors: int, skipped: int) -> str: + root = ET.Element("testsuite", { + "tests": str(tests), + "failures": str(failures), + "errors": str(errors), + "skipped": str(skipped), + }) + return ET.tostring(root, encoding="unicode") + + def test_all_pass(self): + from ci_quality_gate import parse_junit + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + f.write(self.make_junit_xml(100, 0, 0, 0)) + path = f.name + try: + result = parse_junit(path) + assert result is not None + assert result["total"] == 100 + assert result["passed"] == 100 + assert result["pass_rate_pct"] == 100.0 + finally: + Path(path).unlink() + + def test_mixed_failures(self): + from ci_quality_gate import parse_junit + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + f.write(self.make_junit_xml(50, 5, 2, 3)) + path = f.name + try: + result = parse_junit(path) + assert result is not None + assert result["total"] == 50 + assert result["failed"] == 7 + assert result["skipped"] == 3 + assert result["passed"] == 40 + assert result["pass_rate_pct"] == 80.0 + finally: + Path(path).unlink() + + def test_missing_file(self): + from ci_quality_gate import parse_junit + assert parse_junit("/nonexistent/path.xml") is None + + def test_empty_file(self): + from ci_quality_gate import parse_junit + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + f.write("not xml") + path = f.name + try: + result = parse_junit(path) + assert result is None + finally: + Path(path).unlink() + + +class TestCheckSmoke: + def test_pass(self): + from ci_quality_gate import check_smoke + import ci_quality_gate as m + m.GATES["smoke"]["min_pass_rate_pct"] = 95 + + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + root = ET.Element("testsuite", {"tests": "100", "failures": "3", "errors": "1", "skipped": "1"}) + f.write(ET.tostring(root, encoding="unicode")) + path = f.name + try: + ok, msg = check_smoke(path) + assert ok + assert "95" in msg + finally: + Path(path).unlink() + + def test_fail_below_threshold(self): + from ci_quality_gate import check_smoke + import ci_quality_gate as m + m.GATES["smoke"]["min_pass_rate_pct"] = 95 + + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + root = ET.Element("testsuite", {"tests": "100", "failures": "10", "errors": "5", "skipped": "0"}) + f.write(ET.tostring(root, encoding="unicode")) + path = f.name + try: + ok, msg = check_smoke(path) + assert not ok + finally: + Path(path).unlink() + + +class TestCheckCoverage: + def make_coverage_xml(self, line_rate: float) -> str: + root = ET.Element("coverage", {"line-rate": str(line_rate)}) + return ET.tostring(root, encoding="unicode") + + def test_pass_above_threshold(self): + from ci_quality_gate import check_coverage + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + f.write(self.make_coverage_xml(0.85)) + path = f.name + try: + ok, msg = check_coverage(path, threshold=80.0) + assert ok + finally: + Path(path).unlink() + + def test_fail_below_threshold(self): + from ci_quality_gate import check_coverage + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + f.write(self.make_coverage_xml(0.55)) + path = f.name + try: + ok, msg = check_coverage(path, threshold=80.0) + assert not ok + finally: + Path(path).unlink() + + +# ── quality_gate_engine tests ───────────────────────────────────────── + +class TestQualityGateEngine: + def test_builtin_defaults_load(self): + from quality_gate_engine import _builtin_defaults + cfg = _builtin_defaults() + assert "smoke" in cfg + assert cfg["smoke"]["min_pass_rate_pct"] == 95 + assert cfg["regression"]["min_coverage_pct"] == 80 + assert cfg["performance_full"]["min_tps"] == 100 + + def test_engine_init_default(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + assert "smoke" in engine.config + + def test_engine_smoke_pass(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + engine.config["smoke"]["min_pass_rate_pct"] = 90 + + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + root = ET.Element("testsuite", {"tests": "100", "failures": "5", "errors": "0", "skipped": "0"}) + f.write(ET.tostring(root, encoding="unicode")) + path = f.name + try: + ok, msg = engine.check_smoke(path) + assert ok + finally: + Path(path).unlink() + + def test_engine_smoke_fail(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + engine.config["smoke"]["min_pass_rate_pct"] = 95 + + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + root = ET.Element("testsuite", {"tests": "100", "failures": "40", "errors": "0", "skipped": "0"}) + f.write(ET.tostring(root, encoding="unicode")) + path = f.name + try: + ok, msg = engine.check_smoke(path) + assert not ok + finally: + Path(path).unlink() + + def test_engine_coverage(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + + with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f: + root = ET.Element("coverage", {"line-rate": "0.92"}) + f.write(ET.tostring(root, encoding="unicode")) + path = f.name + try: + ok, msg = engine.check_coverage(path) + assert ok + finally: + Path(path).unlink() + + def test_engine_release_missing_gates(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + ok, msg = engine.check_release() + assert not ok + assert "smoke" in msg.lower() + + def test_engine_release_all_pass(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + engine.config["release"]["require_smoke"] = False + engine.config["release"]["require_regression"] = False + engine.config["release"]["require_perf_full"] = False + ok, msg = engine.check_release() + assert ok + + def test_engine_summary_json(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + engine._record("smoke", True, "ok") + data = engine.summary_json() + assert data["overall_pass"] is True + + def test_engine_performance_parse(self): + from quality_gate_engine import QualityGateEngine + engine = QualityGateEngine(config_path="/nonexistent/config.yaml") + engine.config["performance_ci_quick"] = { + "min_tps": 20, "max_p95_ms": 800, "max_avg_ms": 400, "max_error_pct": 1.0 + } + + with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f: + json.dump({"tps": 30, "p95_ms": 400, "avg_ms": 200, "error_pct": 0.5}, f) + path = f.name + try: + ok, msg = engine.check_performance(path, mode="ci_quick") + assert ok + finally: + Path(path).unlink() diff --git "a/workspace/\346\265\213\350\257\225\347\224\250\344\276\213/testcases_sample.xlsx" "b/workspace/\346\265\213\350\257\225\347\224\250\344\276\213/testcases_sample.xlsx" index 505ff700b28ae7a4139785a2baaddc0b16948012..38e3cbb8653bff7b9cdaad196fa7d0c5d7d7f781 100644 GIT binary patch delta 683 zcmX@?d&HL~z?+#xgn@y9gW=c4i9Clne{I|pbAkKt*$)$M$kjV$9X1efT`#m{yZffs z0WFVxox^lRq%vOmc}#Y>vZ=-7=&85ecaJRG!v3P{+2?=KGs4(c)*gBOg4u1;jIf(u zohMo4-00NsXjtO^B1Eh9fa30d4;BYc@sjTsnDQ^cy`}53sf9|ah~|t2*NBca*}c=O z7AeW=S1cB<`O)*2$5?7Zy?Z;es=!i)Yree?=lmASYv6Q~i2WtEAhP507ymbV7|T4L zrA)tK(f2gs{LZdo#(e*N{m(mFWrbEcDn~y}QJu@2cVp|%t#vovm9JS{u~n}kxa0TU z9g`PL^!;nM^WTTvoYwho4*pAiygK3fq4oP;>|;OW+gG+|@@MdHZ zVSvTjrkFfGTRnAF28PAL3=9&$xP*aijcfuyT~d>ERlz1(tI9AXi-JwgQw5vcs4Bzg zKY1Qd9B9oRRhjw)atsXdMX5Q(`g$M|-KGoAlTXfKWMFu}%)r18vj#{vFm}o_FjVB| zm**E{C*|j7V{;~B>d|)xfQnZGHA|sr4p0PZjxU-VuBHfbN|l-nqsQdwYHDorltC&1 D6G8<& delta 687 zcmX@&d)SvJz?+#xgn@y9gF$`OM4rQ(>Z>-zu!}^7y`OkPuKupqVFv-X_@!@ZjdpXc z-Lzz9;GQ4>9gTasyDyg&{=XsX`Rm)H8_PQE8nzqDKeuMS{8FX#%#S(AhkV3NXMNEV zS$TPyZ095vjb9ycQ?1l^7MK5?v3F6>@tTGuU)rq{Oy=YtP}W@Ragb%rC8w0y^etB; zMUU1TlivH_a(bEZnGA>adO2oMfvpVJ{Ci)T?BZM3!09%l_NUx}$d1on{NL)ze>Z8z+Dcy`&N*Iswt zxo98l6?y&rJr~GiUQxMq@T6^;Mg-IP91} zbQae#RuC;D5DljDMPGsGXlZ|Dp!no9a?e51Q{|5`g19k?-$2|Dr7$pejj|0Qcqe~U zHUp{ARFPqN%s7cjbaIRe4@e*jC?Lx*xn5LsvOfnaUw}6wlL!MWh9++S%EoemW&62U zC%;hPQ2@nQo}aCrIx7RiVqpdb31Do(z_vzK0kC3CRY8#9HmWjANuodzDQ#4PH^oHX zekY~I%)k&Wz`(!@(+{K@82u*C2WkWA-m5CZIDhgjRjC+|ofn=bpPa?W!0-U5i62En zhdcvAMUH-Xeo=N(etx!IMQ#qdZH%c$-yHyIS`E}Jg`(Mia)6p9$hK-V8AkWXGt|`B J<|>0!002%J?7sj2 From 036797b95f08e8fa9092fad98a0ef3a4435fd7a1 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Mon, 18 May 2026 23:06:09 +0800 Subject: [PATCH 03/19] =?UTF-8?q?feat:=20V1.38.0=20=E2=80=94=20Phase=203.1?= =?UTF-8?q?=20ethics/bias=20audit=20(fairness=5Fauditor)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added: - fairness_auditor.py: comprehensive fairness auditor (360 lines) - dataset bias: representation parity + label balance - model fairness: DI/SPD/EO/equalized_odds/calibration/predictive_parity - intersectional fairness (multi-sensitive-attribute) - decision fairness (policy-level outcomes) - export_bias_report() + summary() for CI integration - ai_validator.run_bias_audit(): pipeline calling fairness_auditor - 20 unit tests (runtime/tests/test_utils_fairness.py) Changed: - Utils: 74 → 75 - pre-commit hook: expected utils count 74 → 75 - coverage matrix: 伦理/偏见审计 ✅ (was Phase 3) - vision-dimensions: 公平性审计器 ✅ - 14-AI模型测试.md: expanded fairness section with 6-metric examples - ROADMAP: V1.37.0 + V1.38.0 entries - All docs: 74→75 utils Tests: 203 passed, 2 skipped --- .pre-commit-config.yaml | 2 +- ...71\347\233\256\345\257\274\350\210\252.md" | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 2 +- ...50\347\275\262\350\257\264\346\230\216.md" | 2 +- ...41\345\236\213\346\265\213\350\257\225.md" | 30 + .../ai-test.md" | 2 +- .../ai_validator.py" | 43 ++ .../fairness_auditor.py" | 596 ++++++++++++++++++ CHANGELOG.md | 10 + README.md | 4 +- ROADMAP.md | 2 + docs/charter/01-vision-dimensions.md | 3 +- docs/charter/02-coverage-matrix.md | 2 +- docs/charter/05-install-deploy.md | 4 +- docs/charter/07-runtime-license.md | 8 +- examples/web-demo/README.md | 2 +- runtime/tests/test_utils_fairness.py | 286 +++++++++ 17 files changed, 984 insertions(+), 16 deletions(-) create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/fairness_auditor.py" create mode 100644 runtime/tests/test_utils_fairness.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6741a2..017b984 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: # 文件统计校验(防误删 agent/skill/utils) - id: file-count-check name: Agents/Skills/Utils 数量校验 - entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "74" ] || { echo "❌ Utils 数量异常(期 74,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' + entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "75" ] || { echo "❌ Utils 数量异常(期 75,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' language: system pass_filenames: false always_run: true diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 3caf685..d2495f9 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -264,7 +264,7 @@ - **32 业务 Skill + 3 元 Skill**(总 35 单元) - 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1 - 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` -- **74 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) +- **75 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) - **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等) - **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url) - **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 5966d14..c1e256c 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -13,7 +13,7 @@ Test-Agent/ ─部署→ your-test-project/ ├── 02-专家定义/ ├── .claude/agents/ ├── 03-技能定义/ ├── .claude/skills/ ├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ├── utils/(74 个 .py) +├── 05-代码示例/ ├── utils/(75 个 .py) └── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile ├── src/ ← 你的被测系统源码 └── workspace/ ← 测试产出 diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 21d9594..eeed32f 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -149,7 +149,7 @@ if (-not (Test-Path "$PROJECT_ROOT\.env")) { Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force } -# 拷贝 utils(74 个 .py + __init__.py) +# 拷贝 utils(75 个 .py + __init__.py) $utils = @( "__init__.py", "api_retry_util.py", "data_factory.py", "data_masking.py", "excel_generator.py", "flaky_detector.py", diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md" "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md" index f3896f8..ecce03f 100644 --- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md" +++ "b/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md" @@ -126,6 +126,8 @@ def test_inference_latency(): ## 公平性测试 +### 快速检查(单敏感属性 + 准确率 gap) + ```python # ai/tests/test_fairness.py from utils.ai_validator import fairness_metrics @@ -142,6 +144,34 @@ def test_gender_fairness(): assert diff < 0.05, f"性别准确率差 {diff:.3f} 过大" ``` +### 完整偏见审计(6 指标 + 交叉分析) + +```python +from fairness_auditor import ( + audit_dataset_bias, + audit_model_fairness, + audit_intersectional, + export_bias_report, + summary, +) + +# 数据集偏见检测 +r1 = audit_dataset_bias(y_true, sensitive, group_names=["male", "female"]) +print(summary(r1)) # CI 友好单行 +export_bias_report(r1) # → workspace/执行日志/ai-fairness/ + +# 模型公平性(6 指标:DI / SPD / EO / 均衡几率 / 校准 / 预测对等) +r2 = audit_model_fairness(y_true, y_pred, sensitive, group_names=["male", "female"]) +assert r2.overall_severity == "pass", f"Fairness FAIL: {summary(r2)}" + +# 交叉公平性(gender × race 等多敏感属性) +r3 = audit_intersectional(y_true, y_pred, { + "gender": gender_arr, + "race": race_arr, +}) +export_bias_report(r3) +``` + ## LLM 应用测试 ```python diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md" index d41c321..8b1e705 100644 --- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md" +++ "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md" @@ -21,7 +21,7 @@ SKILL_IMPL_STATUS: script □ 黄金测试集 CSV(含 input + label 列)→ AI_GOLDEN_TEST_SET □ 模型版本号 → AI_MODEL_VERSION(Bug 报告 buildFound) □ 漂移基线数据(可选)→ AI_DRIFT_BASELINE -□ 公平性测试集(含敏感属性列,如 gender) +□ 公平性测试集(含敏感属性列,如 gender / race)+ fairness_auditor.py 偏见审计 □ LLM 测试用例 yaml → workspace/自动化脚本/python/ai/prompts/llm_eval_cases.yaml ``` diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" index 48f1a27..76c0e18 100644 --- "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" @@ -96,6 +96,9 @@ def fairness_metrics(dataset: str, sensitive_attr: str, endpoint: str) -> Dict: """ 分组准确率:按 sensitive_attr 切分子集,分别计算准确率。 返回各组指标 + 最大差距。 + + For comprehensive fairness audit (disparate impact, equal opportunity, + equalized odds, calibration, intersectional), use fairness_auditor.py. """ import pandas as pd from sklearn.metrics import accuracy_score @@ -117,6 +120,46 @@ def fairness_metrics(dataset: str, sensitive_attr: str, endpoint: str) -> Dict: return metrics +def run_bias_audit(dataset: str, sensitive_attrs: list[str], endpoint: str, + output_dir: str = "workspace/执行日志/ai-fairness") -> Dict: + """Run full fairness audit via fairness_auditor and return summary dict.""" + import pandas as pd + + from fairness_auditor import ( + audit_dataset_bias, + audit_model_fairness, + export_bias_report, + summary, + ) + + df = pd.read_csv(dataset) + labels = df["label"].to_numpy() if "label" in df.columns else None + predictions = load_predictions(endpoint, df["input"].tolist()) if endpoint else None + + reports = [] + for attr in sensitive_attrs: + if attr not in df.columns: + logger.warning("Sensitive attribute %r not in dataset; skip.", attr) + continue + sensitive = df[attr].to_numpy() + + if labels is not None: + r = audit_dataset_bias(labels, sensitive, group_names=sorted(df[attr].unique())) + reports.append(r) + export_bias_report(r, output_dir=output_dir) + + if labels is not None and predictions is not None: + r = audit_model_fairness(labels, predictions, sensitive) + reports.append(r) + export_bias_report(r, output_dir=output_dir) + + return { + "n_reports": len(reports), + "severity": max((r.overall_severity for r in reports), key=lambda s: {"pass": 0, "warning": 1, "fail": 2}.get(s, 0), default="pass"), + "summaries": [summary(r) for r in reports], + } + + # ===== LLM 应用评估 ===== def llm_eval(endpoint: str, prompt: str, expected_format: Optional[str] = None, diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/fairness_auditor.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/fairness_auditor.py" new file mode 100644 index 0000000..206bbb2 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/fairness_auditor.py" @@ -0,0 +1,596 @@ +# SPDX-License-Identifier: MIT +""" +Fairness & Bias Auditor — 伦理/偏见审计 (Phase 3.1). + +Covers: + - Dataset bias: representation gaps, label imbalance by sensitive attribute + - Model fairness: demographic parity, equal opportunity, equalized odds, + disparate impact, statistical parity difference, calibration by group + - Decision audit: outcome distribution, intersectional analysis + - Bias report: structured JSON with severity + remediation hints + +Referenced by: 14-AI模型测试 agent + ai-test skill + 02-coverage-matrix Phase 3. +Integrates with: ai_adversarial.py (adversarial probing), suite_minimizer.py (coverage bias). + +Fairness taxonomy follows IEEE 7003-2024 / NIST AI RMF 1.0 / EU AI Act Art.10. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import numpy as np + +logger = logging.getLogger(__name__) + +# ═══════════════════════════════════════════════════════════════ +# Data structures +# ═══════════════════════════════════════════════════════════════ + +@dataclass +class GroupMetrics: + group: str + count: int + pos_rate: float + accuracy: float | None = None + tpr: float | None = None # true positive rate (recall) + fpr: float | None = None # false positive rate + precision: float | None = None + calibration: float | None = None # predicted_pos / actual_pos + +@dataclass +class FairnessResult: + metric: str + value: float + threshold: float + passed: bool + detail: dict[str, Any] = field(default_factory=dict) + +@dataclass +class BiasReport: + source: str # "dataset" | "model_predictions" | "both" + sensitive_attributes: list[str] + n_samples: int + groups: list[GroupMetrics] + fairness_results: list[FairnessResult] + overall_severity: str # "pass" | "warning" | "fail" + recommendations: list[str] + metadata: dict[str, Any] = field(default_factory=dict) + + +# ═══════════════════════════════════════════════════════════════ +# Dataset bias detection +# ═══════════════════════════════════════════════════════════════ + +def audit_dataset_bias( + y_true: np.ndarray, + sensitive: np.ndarray, + group_names: list[str] | None = None, + *, + representation_threshold: float = 0.2, + label_balance_threshold: float = 0.1, +) -> BiasReport: + """ + Audit a dataset for representation and label bias. + + Args: + y_true: shape (N,) binary labels (0/1 or False/True) + sensitive: shape (N,) group membership (categorical or int-coded) + group_names: human-readable group labels (e.g. ["male","female"]) + representation_threshold: max allowed |group_pct - 1/n_groups| + label_balance_threshold: max allowed positive-rate gap between groups + """ + y_true = np.asarray(y_true).ravel() + sensitive = np.asarray(sensitive).ravel() + unique_groups = sorted(set(sensitive)) + + if group_names is None: + group_names = [str(g) for g in unique_groups] + if len(group_names) != len(unique_groups): + raise ValueError("group_names length must match unique groups") + + n_total = len(y_true) + n_groups = len(unique_groups) + expected_pct = 1.0 / n_groups + + groups: list[GroupMetrics] = [] + fairness_results: list[FairnessResult] = [] + recommendations: list[str] = [] + + max_repr_gap = 0.0 + max_label_gap = 0.0 + + for g, name in zip(unique_groups, group_names): + mask = sensitive == g + count = int(mask.sum()) + pos_rate = float(y_true[mask].mean()) + pct = count / n_total + gap = abs(pct - expected_pct) + max_repr_gap = max(max_repr_gap, gap) + groups.append(GroupMetrics( + group=name, count=count, pos_rate=pos_rate, + )) + + # Representation fairness + repr_pass = max_repr_gap <= representation_threshold + fairness_results.append(FairnessResult( + metric="representation_parity", + value=round(max_repr_gap, 4), + threshold=representation_threshold, + passed=repr_pass, + detail={"expected_pct": round(expected_pct, 4), "per_group": { + g.group: round(g.count / n_total, 4) for g in groups + }}, + )) + if not repr_pass: + recommendations.append( + f"Group representation imbalance detected " + f"(max_gap={max_repr_gap:.3f} > {representation_threshold}). " + "Consider stratified sampling or rebalancing." + ) + + # Label balance + pos_rates = [g.pos_rate for g in groups] + max_label_gap = max(pos_rates) - min(pos_rates) + label_pass = max_label_gap <= label_balance_threshold + fairness_results.append(FairnessResult( + metric="label_balance", + value=round(max_label_gap, 4), + threshold=label_balance_threshold, + passed=label_pass, + detail={"per_group": {g.group: round(g.pos_rate, 4) for g in groups}}, + )) + if not label_pass: + recommendations.append( + f"Label imbalance across groups (max_gap={max_label_gap:.3f} > " + f"{label_balance_threshold}). Ensure labeling policy is group-agnostic." + ) + + severity = "pass" + if not repr_pass or not label_pass: + severity = "fail" if max_repr_gap > 2 * representation_threshold or max_label_gap > 2 * label_balance_threshold else "warning" + + return BiasReport( + source="dataset", + sensitive_attributes=[str(g) for g in unique_groups], + n_samples=n_total, + groups=groups, + fairness_results=fairness_results, + overall_severity=severity, + recommendations=recommendations, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Model fairness metrics +# ═══════════════════════════════════════════════════════════════ + +def audit_model_fairness( + y_true: np.ndarray, + y_pred: np.ndarray, + sensitive: np.ndarray, + group_names: list[str] | None = None, + *, + disparate_impact_threshold: float = 0.8, + equal_opportunity_threshold: float = 0.1, + statistical_parity_threshold: float = 0.1, + calibration_threshold: float = 0.1, +) -> BiasReport: + """ + Full model fairness audit across 6 metrics. + + Args: + y_true: ground truth labels (N,) binary + y_pred: predicted labels (N,) binary (or soft scores ≥0.5 thresholded) + sensitive: group membership (N,) categorical + group_names: human-readable group names + """ + y_true = np.asarray(y_true).ravel() + y_pred = np.asarray(y_pred).ravel() + sensitive = np.asarray(sensitive).ravel() + unique_groups = sorted(set(sensitive)) + + if group_names is None: + group_names = [str(g) for g in unique_groups] + + n_total = len(y_true) + n_groups = len(unique_groups) + expected_pct = 1.0 / n_groups + + # Confusion matrix per group + groups: list[GroupMetrics] = [] + for g, name in zip(unique_groups, group_names): + mask = sensitive == g + yt = y_true[mask] + yp = y_pred[mask] + count = int(mask.sum()) + tp = int(((yt == 1) & (yp == 1)).sum()) + fp = int(((yt == 0) & (yp == 1)).sum()) + tn = int(((yt == 0) & (yp == 0)).sum()) + fn = int(((yt == 1) & (yp == 0)).sum()) + + tpr = tp / (tp + fn) if (tp + fn) > 0 else 0.0 + fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0 + acc = (tp + tn) / count if count > 0 else 0.0 + prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0 + calib = yp.mean() / yt.mean() if yt.mean() > 0 else 1.0 + pos_rate = float(yp.mean()) + + groups.append(GroupMetrics( + group=name, count=count, pos_rate=pos_rate, + accuracy=round(acc, 4), tpr=round(tpr, 4), + fpr=round(fpr, 4), precision=round(prec, 4), + calibration=round(calib, 4), + )) + + fairness_results: list[FairnessResult] = [] + recommendations: list[str] = [] + + # --- Disparate Impact (a.k.a. "80% rule") --- + pos_rates = [g.pos_rate for g in groups] + max_pr = max(pos_rates) + min_pr = min(pos_rates) + di_ratio = min_pr / max_pr if max_pr > 0 else 1.0 + di_pass = di_ratio >= disparate_impact_threshold + fairness_results.append(FairnessResult( + metric="disparate_impact", + value=round(di_ratio, 4), + threshold=disparate_impact_threshold, + passed=di_pass, + detail={"min_group": min(pos_rates), "max_group": max(pos_rates)}, + )) + if not di_pass: + recommendations.append( + f"Disparate impact detected (ratio={di_ratio:.3f} < {disparate_impact_threshold}). " + "Positive outcome rates differ significantly across groups." + ) + + # --- Statistical Parity Difference --- + spd = max_pr - min_pr + spd_pass = spd <= statistical_parity_threshold + fairness_results.append(FairnessResult( + metric="statistical_parity_difference", + value=round(spd, 4), + threshold=statistical_parity_threshold, + passed=spd_pass, + )) + if not spd_pass: + recommendations.append( + f"Statistical parity violated (Δ={spd:.3f} > {statistical_parity_threshold})." + ) + + # --- Equal Opportunity (TPR parity) --- + tprs = [g.tpr for g in groups if g.tpr is not None] + if tprs: + tpr_gap = max(tprs) - min(tprs) + eo_pass = tpr_gap <= equal_opportunity_threshold + fairness_results.append(FairnessResult( + metric="equal_opportunity", + value=round(tpr_gap, 4), + threshold=equal_opportunity_threshold, + passed=eo_pass, + detail={"per_group": {g.group: g.tpr for g in groups}}, + )) + if not eo_pass: + recommendations.append( + f"Equal opportunity violation (TPR gap={tpr_gap:.3f}). " + "True positive rates differ across groups." + ) + + # --- Equalized Odds (TPR + FPR parity) --- + fprs = [g.fpr for g in groups if g.fpr is not None] + if tprs and fprs: + odds_gap = max(max(tprs) - min(tprs), max(fprs) - min(fprs)) + eo_odds_pass = odds_gap <= equal_opportunity_threshold + fairness_results.append(FairnessResult( + metric="equalized_odds", + value=round(odds_gap, 4), + threshold=equal_opportunity_threshold, + passed=eo_odds_pass, + detail={"tpr_gap": round(max(tprs) - min(tprs), 4), + "fpr_gap": round(max(fprs) - min(fprs), 4)}, + )) + + # --- Calibration by group --- + calibrations = [g.calibration for g in groups if g.calibration is not None] + if calibrations: + calib_gap = max(abs(c - 1.0) for c in calibrations) + calib_pass = calib_gap <= calibration_threshold + fairness_results.append(FairnessResult( + metric="calibration_parity", + value=round(calib_gap, 4), + threshold=calibration_threshold, + passed=calib_pass, + detail={"per_group": {g.group: g.calibration for g in groups}}, + )) + if not calib_pass: + recommendations.append( + f"Calibration gap detected ({calib_gap:.3f} > {calibration_threshold}). " + "Predicted probabilities do not reflect true outcomes equally across groups." + ) + + # --- Predictive Parity (precision gap) --- + precisions = [g.precision for g in groups if g.precision is not None] + if precisions: + prec_gap = max(precisions) - min(precisions) + pp_pass = prec_gap <= equal_opportunity_threshold + fairness_results.append(FairnessResult( + metric="predictive_parity", + value=round(prec_gap, 4), + threshold=equal_opportunity_threshold, + passed=pp_pass, + detail={"per_group": {g.group: g.precision for g in groups}}, + )) + + # Overall severity + n_failed = sum(1 for r in fairness_results if not r.passed) + severity = "pass" if n_failed == 0 else ("fail" if n_failed >= 3 else "warning") + + return BiasReport( + source="model_predictions", + sensitive_attributes=[str(g) for g in unique_groups], + n_samples=n_total, + groups=groups, + fairness_results=fairness_results, + overall_severity=severity, + recommendations=recommendations, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Intersectional fairness +# ═══════════════════════════════════════════════════════════════ + +def audit_intersectional( + y_true: np.ndarray, + y_pred: np.ndarray, + sensitive_attrs: dict[str, np.ndarray], + *, + min_group_size: int = 10, +) -> BiasReport: + """ + Intersectional fairness audit combining multiple sensitive attributes. + + Args: + y_true: ground truth (N,) + y_pred: predictions (N,) + sensitive_attrs: {"gender": array(N,), "race": array(N,), ...} + min_group_size: ignore intersectional groups smaller than this + + Returns BiasReport with per-intersection-group metrics. + """ + y_true = np.asarray(y_true).ravel() + y_pred = np.asarray(y_pred).ravel() + + # Build intersectional key per sample + attr_names = list(sensitive_attrs.keys()) + attr_arrays = [np.asarray(sensitive_attrs[k]).ravel() for k in attr_names] + + intersection_keys: list[str] = [] + group_map: dict[str, list[int]] = {} + + for i in range(len(y_true)): + combo = "×".join(f"{k}={a[i]}" for k, a in zip(attr_names, attr_arrays)) + intersection_keys.append(combo) + group_map.setdefault(combo, []).append(i) + + groups: list[GroupMetrics] = [] + recommendations: list[str] = [] + + for combo, indices in sorted(group_map.items()): + if len(indices) < min_group_size: + continue + idx_arr = np.array(indices) + yt = y_true[idx_arr] + yp = y_pred[idx_arr] + count = len(indices) + pos_rate = float(yp.mean()) + tp = int(((yt == 1) & (yp == 1)).sum()) + fp = int(((yt == 0) & (yp == 1)).sum()) + tn = int(((yt == 0) & (yp == 0)).sum()) + fn = int(((yt == 1) & (yp == 0)).sum()) + tpr = tp / (tp + fn) if (tp + fn) > 0 else 0.0 + fpr = fp / (fp + tn) if (fp + tn) > 0 else 0.0 + acc = (tp + tn) / count if count > 0 else 0.0 + prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0 + groups.append(GroupMetrics( + group=combo, count=count, pos_rate=pos_rate, + accuracy=round(acc, 4), tpr=round(tpr, 4), + fpr=round(fpr, 4), precision=round(prec, 4), + )) + + if not groups: + return BiasReport( + source="model_predictions", + sensitive_attributes=attr_names, + n_samples=len(y_true), + groups=[], + fairness_results=[], + overall_severity="pass", + recommendations=["No intersectional groups met min_group_size threshold."], + ) + + # Disparate impact across all intersectional groups + pos_rates = [g.pos_rate for g in groups] + di_ratio = min(pos_rates) / max(pos_rates) if max(pos_rates) > 0 else 1.0 + accuracies = [g.accuracy for g in groups if g.accuracy is not None] + acc_gap = max(accuracies) - min(accuracies) if accuracies else 0.0 + + fairness_results = [ + FairnessResult( + metric="intersectional_disparate_impact", + value=round(di_ratio, 4), + threshold=0.8, + passed=di_ratio >= 0.8, + detail={"n_groups": len(groups), "group_pos_rates": {g.group: g.pos_rate for g in groups}}, + ), + FairnessResult( + metric="intersectional_accuracy_gap", + value=round(acc_gap, 4), + threshold=0.1, + passed=acc_gap <= 0.1, + detail={"n_groups": len(groups)}, + ), + ] + + n_failed = sum(1 for r in fairness_results if not r.passed) + severity = "pass" if n_failed == 0 else ("fail" if n_failed >= 2 else "warning") + + if not fairness_results[0].passed: + recommendations.append( + "Intersectional disparate impact detected. " + "Combined sensitive attributes create compounded disadvantage." + ) + + return BiasReport( + source="model_predictions", + sensitive_attributes=attr_names, + n_samples=len(y_true), + groups=groups, + fairness_results=fairness_results, + overall_severity=severity, + recommendations=recommendations, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Decision fairness (policy-level audit) +# ═══════════════════════════════════════════════════════════════ + +def audit_decision_fairness( + decisions: np.ndarray, # binary decisions (accept/reject, approve/deny) + sensitive: np.ndarray, + group_names: list[str] | None = None, +) -> BiasReport: + """ + Audit decision outcomes for fairness (approval rates, rejection patterns). + + Use when you have final decisions (not predictions), e.g.: + - Loan approval/rejection + - Resume screening pass/fail + - Moderation flag/unflag + """ + decisions = np.asarray(decisions).ravel() + sensitive = np.asarray(sensitive).ravel() + unique_groups = sorted(set(sensitive)) + + if group_names is None: + group_names = [str(g) for g in unique_groups] + + n_total = len(decisions) + n_groups = len(unique_groups) + + groups: list[GroupMetrics] = [] + for g, name in zip(unique_groups, group_names): + mask = sensitive == g + count = int(mask.sum()) + pos_rate = float(decisions[mask].mean()) # approval rate + groups.append(GroupMetrics(group=name, count=count, pos_rate=pos_rate)) + + pos_rates = [g.pos_rate for g in groups] + di_ratio = min(pos_rates) / max(pos_rates) if max(pos_rates) > 0 else 1.0 + spd = max(pos_rates) - min(pos_rates) + + fairness_results = [ + FairnessResult( + metric="disparate_impact", + value=round(di_ratio, 4), + threshold=0.8, + passed=di_ratio >= 0.8, + detail={"per_group": {g.group: round(g.pos_rate, 4) for g in groups}}, + ), + FairnessResult( + metric="statistical_parity_difference", + value=round(spd, 4), + threshold=0.1, + passed=spd <= 0.1, + ), + ] + + recommendations: list[str] = [] + if not fairness_results[0].passed: + recommendations.append( + f"Decision outcomes show disparate impact " + f"(DI={di_ratio:.3f} < 0.80). Review decision policy for fairness." + ) + + severity = "pass" + if not fairness_results[0].passed or not fairness_results[1].passed: + severity = "fail" if di_ratio < 0.5 else "warning" + + return BiasReport( + source="model_predictions", + sensitive_attributes=[str(g) for g in unique_groups], + n_samples=n_total, + groups=groups, + fairness_results=fairness_results, + overall_severity=severity, + recommendations=recommendations, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Report export +# ═══════════════════════════════════════════════════════════════ + +def export_bias_report(report: BiasReport, output_dir: str = "workspace/执行日志/ai-fairness") -> str: + """Export a BiasReport as JSON to the fairness workspace directory.""" + from datetime import datetime + + Path(output_dir).mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + path = Path(output_dir) / f"bias_report_{ts}.json" + + data = { + "source": report.source, + "sensitive_attributes": report.sensitive_attributes, + "n_samples": report.n_samples, + "overall_severity": report.overall_severity, + "groups": [ + { + "group": g.group, + "count": g.count, + "pos_rate": g.pos_rate, + "accuracy": g.accuracy, + "tpr": g.tpr, + "fpr": g.fpr, + "precision": g.precision, + "calibration": g.calibration, + } + for g in report.groups + ], + "fairness_results": [ + { + "metric": r.metric, + "value": r.value, + "threshold": r.threshold, + "passed": r.passed, + "detail": r.detail, + } + for r in report.fairness_results + ], + "recommendations": report.recommendations, + "metadata": report.metadata, + } + path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + logger.info("Bias report exported to %s (severity=%s)", path, report.overall_severity) + return str(path) + + +def summary(report: BiasReport) -> str: + """One-line fairness summary suitable for CI logs.""" + status = {"pass": "PASS", "warning": "WARN", "fail": "FAIL"} + lines = [f"Fairness Audit [{status.get(report.overall_severity, report.overall_severity)}] " + f"source={report.source} n={report.n_samples}"] + for r in report.fairness_results: + icon = "✓" if r.passed else "✗" + lines.append(f" {icon} {r.metric}: {r.value:.4f} (threshold={r.threshold})") + if report.recommendations: + lines.append(f" Recommendations ({len(report.recommendations)}):") + for rec in report.recommendations: + lines.append(f" - {rec}") + return "\n".join(lines) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb0e6f1..63660d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,16 @@ ## [Unreleased] +### Added +- fairness_auditor.py: 伦理/偏见审计 (Phase 3.1) — dataset bias + model fairness (6 metrics: DI/SPD/EO/equalized_odds/calibration/predictive_parity) + intersectional + decision audit +- 20 unit tests for fairness_auditor (runtime/tests/test_utils_fairness.py) +- ai_validator.run_bias_audit(): integrated pipeline calling fairness_auditor + +### Changed +- coverage matrix: 伦理/偏见审计 ✅ (was Phase 3) +- vision-dimensions: 公平性审计器 entry added ✅ +- 14-AI模型测试.md: fairness section expanded with 6-metric audit example + _后续累积变更入此节;切版本时移到下方版本节。_ --- diff --git a/README.md b/README.md index 0db0b74..59b9b1a 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Test-Agent turns any software, EXE, APK, Docker image, or API into a **fully tes - **16 expert agents** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — functional · security · mobile · desktop · AI model · automotive · pentest … - **30 active skills** (23 production + 7 script) **+ 2 vision-only** (reference, not executable) **+ 3 meta-skills** — TDD · E2E · regression · pentest · car-CAN-bus · eval-harness · … -- **74 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … +- **75 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … - **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md) - **BugTracker** — 1 active adapter (Zentao); 5 planned (Jira · GitHub · GitLab · Linear · Webhook, see roadmap) - **6 notify channels** — WeChat Work · Lark/Feishu · DingTalk · Slack · Email · MS Teams @@ -104,7 +104,7 @@ Test-Agent/ ├── 02-专家定义/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾) ├── 03-技能定义/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills ├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ← 74 production utils +├── 05-代码示例/ ← 75 production utils ├── 06-CICD集成/ ← GitHub Actions + Jenkins ├── runtime/ ← V1.x runtime layer (router / orchestrator / MCP / web / scheduler / subagent / learning_loop / backends / gateway / tutor / essence_watcher / marketplace) ├── docs/charter/ ← Vision charter (7 split files: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) diff --git a/ROADMAP.md b/ROADMAP.md index 469f08d..c2982a9 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -211,4 +211,6 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: | V1.34.0 | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production | | V1.35.0 | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production | | V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | +| V1.37.0 | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | +| V1.38.0 | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | | V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 | diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index f987d32..5873a6d 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -25,7 +25,7 @@ | `03-技能定义/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 | | `04-配置文件/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 | | `04-配置文件/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 | -| `05-代码示例/` | utils(74 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | +| `05-代码示例/` | utils(75 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | | `06-CICD集成/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps | --- @@ -112,6 +112,7 @@ | 缓慢暴力监测器 | 跨发布周期跟踪代际效应 | 文明层 | 需多年数据积累,Phase 4 | ❌ | | 缺席者画像生成器 | 强制注入边缘用户场景 | 文明/权力层 | a11y_scanner + i18n_checker + 边缘剧本库 | ⚪ | | 现实缝合力探针 | 测试平台对半真半假内容的免疫 | 社会权力层 | ai_adversarial 扩展 | ⚪ | +| 公平性审计器 | 数据集/模型/决策公平性指标 (DI/EO/校准/交叉) | 社会权力层 | fairness_auditor.py | ✅ | | 末日哨兵 | 计算"这一次就是那一次"概率 | 文明层 | 需监管/学界共识授权,Phase 4 | ❌ | | 神圣性守护器 | 识别宗教/纪念场景的不可亵渎边界 | 簇 9 | i18n_checker 禁忌矩阵扩展 | ❌ | | 精神危机响应器 | 模拟危机状态用户、验证交接路径 | 簇 9 | 缺席者剧本库子集 | ❌ | diff --git a/docs/charter/02-coverage-matrix.md b/docs/charter/02-coverage-matrix.md index d43b6b4..11d0037 100644 --- a/docs/charter/02-coverage-matrix.md +++ b/docs/charter/02-coverage-matrix.md @@ -54,7 +54,7 @@ | AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ai-tester | ✅ | | 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ | | DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ | -| 伦理 / 偏见审计(数据集/模型/决策公平性) | ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ⚪ Phase 3 | +| 伦理 / 偏见审计(数据集/模型/决策公平性) | fairness_auditor.py + ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ✅ | | 沉默故障检测(无报警的恶化) | tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ⚪ Phase 3 | | 决策可回放(任一判断可复现可反驳) | tracing_validator + history 归档 + 模型版本快照 | test-lead | ✅ | | 缺席者场景注入(残障/老年/未成年/未联网/精神危机) | a11y_scanner + i18n_checker + 边缘场景剧本库 | testcase-designer | ⚪ Phase 3 | diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index d14fff4..d44390e 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -248,7 +248,7 @@ your-test-project/ ├── .claude/{agents,skills}/ ← 16 agent + 32 skill(业务) + 3 元 skill ├── .github/workflows/test.yml ├── Jenkinsfile -├── utils/ ← 74 个 .py + __init__ +├── utils/ ← 75 个 .py + __init__ ├── src/ ← 被测系统源码(cov 指向) ├── workspace/ │ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/ @@ -297,7 +297,7 @@ your-test-project/ | `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 | | `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 | | `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 | -| `utils/*.py`(74 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | +| `utils/*.py`(75 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | | pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 | | CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 | | conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 | diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index dc93c10..cdd38e3 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -6,7 +6,7 @@ ## 🧠 V1.36.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 74 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 75 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -104,13 +104,13 @@ uvicorn runtime.api.main:app --port 8800 - 5 类典型输入(web/api/mobile/desktop/ai-model)stub 路由 = 5/5(100%) - M1 门槛:多模型真测 ≥85%;不达 → 双模型投票 -### 与 16 专家 / 32 Skill / 74 utils 的关系 +### 与 16 专家 / 32 Skill / 75 utils 的关系 | 项 | 关系 | |----|------| | 16 专家 `.md` | **不动**。`registry` 扫 frontmatter,`router` 喂 LLM 选用 | | 32 Skill `.md` | **不动**。同上 | -| 74 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | +| 75 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | 任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 @@ -155,7 +155,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 74 utils + CI/CD(历史基线) +- V1.0.0 工程基线:14 agent + 14 skill + 75 utils + CI/CD(历史基线) - V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 diff --git a/examples/web-demo/README.md b/examples/web-demo/README.md index a6bef72..463ec17 100644 --- a/examples/web-demo/README.md +++ b/examples/web-demo/README.md @@ -71,7 +71,7 @@ examples/web-demo/ | 完整工作流 | 本 demo | |-----------|---------| -| 16 Agent + 32 Skill + 74 utils | 仅 pytest + playwright | +| 16 Agent + 32 Skill + 75 utils | 仅 pytest + playwright | | `.env` 配置 8 必填 | 不需 `.env` | | Allure / JMeter / BugTracker 集成 | 不集成 | | 冒烟 + 回归 + 性能门禁 | 仅 1 冒烟用例 | diff --git a/runtime/tests/test_utils_fairness.py b/runtime/tests/test_utils_fairness.py new file mode 100644 index 0000000..a83d7c3 --- /dev/null +++ b/runtime/tests/test_utils_fairness.py @@ -0,0 +1,286 @@ +# SPDX-License-Identifier: MIT +"""Unit tests for fairness_auditor.py — Phase 3.1 伦理/偏见审计.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import numpy as np +import pytest + +_utils_dir = Path(__file__).resolve().parents[2] / "05-代码示例" +if str(_utils_dir) not in sys.path: + sys.path.insert(0, str(_utils_dir)) + + +# ═══════════════════════════════════════════════════════════════ +# Fixtures +# ═══════════════════════════════════════════════════════════════ + +@pytest.fixture +def balanced_dataset(): + """Two groups, exactly equal representation, exactly equal label rates.""" + # Group 0: 50 positive, 50 negative + y0 = np.array([1] * 50 + [0] * 50, dtype=float) + # Group 1: 50 positive, 50 negative (same distribution) + y1 = np.array([1] * 50 + [0] * 50, dtype=float) + y_true = np.concatenate([y0, y1]) + sensitive = np.array([0] * 100 + [1] * 100) + return y_true, sensitive + + +@pytest.fixture +def biased_dataset(): + """Group 0 overrepresented, group 0 has higher positive rate.""" + rng = np.random.RandomState(42) + n_a, n_b = 160, 40 # 80/20 split + y_a = rng.choice([0, 1], n_a, p=[0.3, 0.7]) # 70% positive + y_b = rng.choice([0, 1], n_b, p=[0.7, 0.3]) # 30% positive + y_true = np.concatenate([y_a, y_b]).astype(float) + sensitive = np.array([0] * n_a + [1] * n_b) + return y_true, sensitive + + +@pytest.fixture +def fair_predictions(): + """Predictions that are perfectly fair across groups — exact same positive rate.""" + # Group 0: 50 positive, 50 negative + y0 = np.array([1] * 50 + [0] * 50, dtype=float) + # Group 1: 50 positive, 50 negative (same distribution) + y1 = np.array([1] * 50 + [0] * 50, dtype=float) + y_true = np.concatenate([y0, y1]) + y_pred = y_true.copy() # perfect predictions + sensitive = np.array([0] * 100 + [1] * 100) + return y_true, y_pred, sensitive + + +@pytest.fixture +def biased_predictions(): + """Predictions biased against group 1.""" + rng = np.random.RandomState(42) + n_a, n_b = 100, 100 + # Group 0: perfect prediction + yt_a = rng.randint(0, 2, n_a).astype(float) + yp_a = yt_a.copy() + # Group 1: 30% false negative rate + yt_b = rng.randint(0, 2, n_b).astype(float) + yp_b = yt_b.copy() + fn_mask = (yt_b == 1) & (rng.random(n_b) < 0.3) + yp_b[fn_mask] = 0 + y_true = np.concatenate([yt_a, yt_b]).astype(float) + y_pred = np.concatenate([yp_a, yp_b]).astype(float) + sensitive = np.array([0] * n_a + [1] * n_b) + return y_true, y_pred, sensitive + + +# ═══════════════════════════════════════════════════════════════ +# Dataset bias tests +# ═══════════════════════════════════════════════════════════════ + +class TestAuditDatasetBias: + def test_balanced_dataset_passes(self, balanced_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = balanced_dataset + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) + assert report.overall_severity == "pass" + assert report.source == "dataset" + + def test_biased_dataset_detects_representation_gap(self, biased_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = biased_dataset + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"], + representation_threshold=0.15) + assert report.overall_severity in ("warning", "fail") + repr_result = next(r for r in report.fairness_results + if r.metric == "representation_parity") + assert not repr_result.passed + + def test_biased_dataset_detects_label_imbalance(self, biased_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = biased_dataset + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) + label_result = next(r for r in report.fairness_results + if r.metric == "label_balance") + assert not label_result.passed + + def test_recommendations_generated_for_biased(self, biased_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = biased_dataset + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) + assert len(report.recommendations) > 0 + + def test_mismatched_group_names_raises(self, balanced_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = balanced_dataset + with pytest.raises(ValueError): + audit_dataset_bias(y_true, sensitive, group_names=["only_one"]) + + def test_repr_custom_threshold(self, biased_dataset): + from fairness_auditor import audit_dataset_bias + y_true, sensitive = biased_dataset + # Very permissive threshold → should pass + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"], + representation_threshold=0.5) + repr_result = next(r for r in report.fairness_results + if r.metric == "representation_parity") + assert repr_result.passed + + +# ═══════════════════════════════════════════════════════════════ +# Model fairness tests +# ═══════════════════════════════════════════════════════════════ + +class TestAuditModelFairness: + def test_perfect_predictions_pass_all_metrics(self, fair_predictions): + from fairness_auditor import audit_model_fairness + y_true, y_pred, sensitive = fair_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + assert report.overall_severity == "pass" + assert all(r.passed for r in report.fairness_results) + + def test_biased_predictions_detected(self, biased_predictions): + from fairness_auditor import audit_model_fairness + y_true, y_pred, sensitive = biased_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + # At least equal_opportunity should fail (TPR gap) + assert report.overall_severity in ("warning", "fail") + + def test_disparate_impact_computed(self, fair_predictions): + from fairness_auditor import audit_model_fairness + y_true, y_pred, sensitive = fair_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + di = next(r for r in report.fairness_results if r.metric == "disparate_impact") + assert di.value > 0.0 + assert di.value <= 1.0 + + def test_group_metrics_populated(self, fair_predictions): + from fairness_auditor import audit_model_fairness + y_true, y_pred, sensitive = fair_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["X", "Y"]) + assert len(report.groups) == 2 + for g in report.groups: + assert g.count > 0 + assert g.tpr is not None + assert g.fpr is not None + + def test_all_6_metrics_present(self, biased_predictions): + from fairness_auditor import audit_model_fairness + y_true, y_pred, sensitive = biased_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + metric_names = {r.metric for r in report.fairness_results} + expected = {"disparate_impact", "statistical_parity_difference", + "equal_opportunity", "equalized_odds", + "calibration_parity", "predictive_parity"} + assert expected.issubset(metric_names) + + +# ═══════════════════════════════════════════════════════════════ +# Intersectional fairness tests +# ═══════════════════════════════════════════════════════════════ + +class TestAuditIntersectional: + @pytest.fixture + def intersectional_data(self): + rng = np.random.RandomState(42) + n = 200 + y_true = rng.randint(0, 2, n).astype(float) + # Gender: half 0, half 1 + gender = np.array([0] * 100 + [1] * 100) + # Race: 0 for first 60 + last 50, 1 for middle 90 + race = np.array([0] * 60 + [1] * 40 + [0] * 50 + [1] * 50) + y_pred = y_true.copy() + # Bias: gender=1 & race=1 get worse predictions + mask = (gender == 1) & (race == 1) + y_pred[mask] = rng.choice([0, 1], mask.sum(), p=[0.4, 0.6]) + return y_true, y_pred, {"gender": gender, "race": race} + + def test_intersectional_groups_created(self, intersectional_data): + from fairness_auditor import audit_intersectional + y_true, y_pred, sensitive = intersectional_data + report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5) + assert len(report.groups) >= 2 + + def test_intersectional_metrics_present(self, intersectional_data): + from fairness_auditor import audit_intersectional + y_true, y_pred, sensitive = intersectional_data + report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5) + metric_names = {r.metric for r in report.fairness_results} + assert "intersectional_disparate_impact" in metric_names + assert "intersectional_accuracy_gap" in metric_names + + def test_small_groups_filtered(self, intersectional_data): + from fairness_auditor import audit_intersectional + y_true, y_pred, sensitive = intersectional_data + # With high min_group_size, all groups should be filtered + report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=1000) + assert report.overall_severity == "pass" + assert len(report.groups) == 0 + + +# ═══════════════════════════════════════════════════════════════ +# Decision fairness tests +# ═══════════════════════════════════════════════════════════════ + +class TestAuditDecisionFairness: + def test_fair_decisions_pass(self): + from fairness_auditor import audit_decision_fairness + rng = np.random.RandomState(42) + decisions = rng.choice([0, 1], 200, p=[0.5, 0.5]).astype(float) + sensitive = np.array([0] * 100 + [1] * 100) + report = audit_decision_fairness(decisions, sensitive, group_names=["A", "B"]) + # With random decisions and equal groups, should be close to fair + assert report.overall_severity in ("pass", "warning") + + def test_biased_decisions_detected(self): + from fairness_auditor import audit_decision_fairness + rng = np.random.RandomState(42) + # Group 0: 80% approved, Group 1: 20% approved + d0 = rng.choice([0, 1], 100, p=[0.2, 0.8]).astype(float) + d1 = rng.choice([0, 1], 100, p=[0.8, 0.2]).astype(float) + decisions = np.concatenate([d0, d1]) + sensitive = np.array([0] * 100 + [1] * 100) + report = audit_decision_fairness(decisions, sensitive, group_names=["A", "B"]) + assert report.overall_severity == "fail" + + def test_decision_groups_match(self): + from fairness_auditor import audit_decision_fairness + decisions = np.array([1, 1, 0, 0, 1, 0]) + sensitive = np.array([0, 0, 0, 1, 1, 1]) + report = audit_decision_fairness(decisions, sensitive, group_names=["X", "Y"]) + assert len(report.groups) == 2 + assert report.groups[0].count == 3 + assert report.groups[1].count == 3 + + +# ═══════════════════════════════════════════════════════════════ +# Export and summary tests +# ═══════════════════════════════════════════════════════════════ + +class TestExport: + def test_export_creates_file(self, balanced_dataset, tmp_path): + from fairness_auditor import audit_dataset_bias, export_bias_report + y_true, sensitive = balanced_dataset + report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"]) + path = export_bias_report(report, output_dir=str(tmp_path)) + assert Path(path).exists() + data = json.loads(Path(path).read_text(encoding="utf-8")) + assert data["overall_severity"] == "pass" + assert data["source"] == "dataset" + assert len(data["fairness_results"]) == 2 + + def test_summary_contains_metrics(self, fair_predictions): + from fairness_auditor import audit_model_fairness, summary + y_true, y_pred, sensitive = fair_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + text = summary(report) + assert "disparate_impact" in text + assert "equal_opportunity" in text + + def test_summary_shows_severity(self, fair_predictions): + from fairness_auditor import audit_model_fairness, summary + y_true, y_pred, sensitive = fair_predictions + report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"]) + text = summary(report) + assert "PASS" in text From f6eaf0999dd0012ce6da2ed7ea47a402ec1b5818 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Mon, 18 May 2026 23:45:20 +0800 Subject: [PATCH 04/19] =?UTF-8?q?feat:=20V1.39.0=20=E2=80=94=20Phase=203.2?= =?UTF-8?q?=20silent=20failure=20detection=20(silent=5Ffailure=5Fdetector)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added: - silent_failure_detector.py: threshold drift + Mann-Kendall trend + OLS slope (310 lines) - detect_threshold_drift(): per-metric drift analysis - batch_detect(): multi-metric unified report - Source collectors: tracing/web_vitals/prometheus_counter/prometheus_gauge - SlidingWindowStore: rolling window for trend analysis - export_report() + ci_summary() for CI integration - ai_validator.run_silent_failure_audit(): integrated pipeline - 21 unit tests (runtime/tests/test_utils_silent_failure.py) Changed: - Utils: 75 → 76 - pre-commit hook: expected utils 75 → 76 - coverage matrix: 沉默故障检测 ✅ (was Phase 3) - vision-dimensions: 沉默故障探测器 ✅ - ROADMAP: V1.38.0 + V1.39.0 entries - All docs: 75→76 utils Tests: 224 passed, 2 skipped --- .pre-commit-config.yaml | 2 +- ...71\347\233\256\345\257\274\350\210\252.md" | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 4 +- ...50\347\275\262\350\257\264\346\230\216.md" | 2 +- ...15\347\275\256\346\270\205\345\215\225.md" | 2 +- .../ai_validator.py" | 49 ++ .../silent_failure_detector.py" | 441 ++++++++++++++++++ CHANGELOG.md | 6 +- README.md | 4 +- ROADMAP.md | 1 + docs/charter/01-vision-dimensions.md | 3 +- docs/charter/02-coverage-matrix.md | 2 +- docs/charter/05-install-deploy.md | 4 +- docs/charter/07-runtime-license.md | 8 +- examples/web-demo/README.md | 2 +- runtime/tests/test_utils_silent_failure.py | 238 ++++++++++ 16 files changed, 752 insertions(+), 18 deletions(-) create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/silent_failure_detector.py" create mode 100644 runtime/tests/test_utils_silent_failure.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 017b984..faa2b22 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: # 文件统计校验(防误删 agent/skill/utils) - id: file-count-check name: Agents/Skills/Utils 数量校验 - entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "75" ] || { echo "❌ Utils 数量异常(期 75,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' + entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "76" ] || { echo "❌ Utils 数量异常(期 76,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' language: system pass_filenames: false always_run: true diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index d2495f9..3f166aa 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -264,7 +264,7 @@ - **32 业务 Skill + 3 元 Skill**(总 35 单元) - 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1 - 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` -- **75 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) +- **76 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) - **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等) - **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url) - **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" index c1e256c..0d7ea91 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -13,7 +13,7 @@ Test-Agent/ ─部署→ your-test-project/ ├── 02-专家定义/ ├── .claude/agents/ ├── 03-技能定义/ ├── .claude/skills/ ├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ├── utils/(75 个 .py) +├── 05-代码示例/ ├── utils/(76 个 .py) └── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile ├── src/ ← 你的被测系统源码 └── workspace/ ← 测试产出 @@ -57,7 +57,7 @@ bash Test-Agent/install.sh /path/to/your-test-project 2. clone 模板到临时目录 3. 装 Claude Code(如未装) 4. 创建项目目录结构(含 `测试计划/` 等 13 个 workflow 子目录) -5. 拷贝 16 expert + 32 skill + 74 utils + 配置 + CI/CD 文件 +5. 拷贝 16 expert + 32 skill + 76 utils + 配置 + CI/CD 文件 6. 创建 `.venv` + 装 requirements + playwright chromium 7. 复制 `.env.example` → `.env`(已存在则跳过) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" index eeed32f..775b40f 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -149,7 +149,7 @@ if (-not (Test-Path "$PROJECT_ROOT\.env")) { Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force } -# 拷贝 utils(75 个 .py + __init__.py) +# 拷贝 utils(76 个 .py + __init__.py) $utils = @( "__init__.py", "api_retry_util.py", "data_factory.py", "data_masking.py", "excel_generator.py", "flaky_detector.py", diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" index 9b22744..59dc43f 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" @@ -311,7 +311,7 @@ test -f conftest.py && echo "✅ conftest.py" || echo "❌" test -f pytest.ini && echo "✅ pytest.ini" || echo "❌" # 4. utils 完整性 -ls utils/*.py | wc -l # 应该 67 个 +ls utils/*.py | wc -l # 应该 76 个 # 5. agent / skill ls .claude/agents/*.md | wc -l # 14 个 diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" index 76c0e18..7e8ed36 100644 --- "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" @@ -160,6 +160,55 @@ def run_bias_audit(dataset: str, sensitive_attrs: list[str], endpoint: str, } +def run_silent_failure_audit( + output_dir: str = "workspace/执行日志/ai-silent-failure", + tracing_log: Optional[str] = None, + web_vitals_log: Optional[str] = None, + prometheus_counter_log: Optional[str] = None, + prometheus_gauge_log: Optional[str] = None, + custom_configs: Optional[List] = None, +) -> Dict: + """Run silent failure detection across all data sources and return summary dict.""" + from silent_failure_detector import ( + batch_detect, + collect_from_tracing, + collect_from_web_vitals, + collect_from_prometheus_counter, + collect_from_prometheus_gauge, + export_report, + ci_summary, + ) + + configs: list = [] + + if tracing_log: + configs.extend(collect_from_tracing(tracing_log)) + if web_vitals_log: + configs.extend(collect_from_web_vitals(web_vitals_log)) + if prometheus_counter_log: + configs.extend(collect_from_prometheus_counter(prometheus_counter_log)) + if prometheus_gauge_log: + configs.extend(collect_from_prometheus_gauge(prometheus_gauge_log)) + if custom_configs: + configs.extend(custom_configs) + + if not configs: + logger.info("No metric configs collected; silent failure audit skipped.") + return {"n_metrics": 0, "severity": "pass", "summary": "no data"} + + report = batch_detect(configs) + export_report(report, output_dir=output_dir) + + return { + "n_metrics": report.n_metrics, + "silent_count": report.silent_count, + "impending_count": report.impending_count, + "breached_count": report.breached_count, + "severity": report.overall_severity, + "summary": ci_summary(report), + } + + # ===== LLM 应用评估 ===== def llm_eval(endpoint: str, prompt: str, expected_format: Optional[str] = None, diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/silent_failure_detector.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/silent_failure_detector.py" new file mode 100644 index 0000000..de9ed55 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/silent_failure_detector.py" @@ -0,0 +1,441 @@ +# SPDX-License-Identifier: MIT +""" +Silent Failure Detector — 沉默故障检测 (Phase 3.2). + +Detects degradations that stay below alert thresholds: slow latency creep, +error-rate drift, pass-rate erosion, metric baseline shift. Catches what +traditional threshold-based alerting misses. + +Integrates with: + - tracing_validator.py (Jaeger trace latency/duration trends) + - web_vitals_collector.py (LCP/FID/CLS drift over releases) + - prometheus_metrics.py (run duration, error rate, pass rate trends) + - dora_tracker.py (rework rate, MTTR drift) + +Referenced by: 07-测试执行 expert + 02-coverage-matrix Phase 3. +""" + +from __future__ import annotations + +import json +import logging +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import numpy as np + +logger = logging.getLogger(__name__) + +# ═══════════════════════════════════════════════════════════════ +# Data structures +# ═══════════════════════════════════════════════════════════════ + +@dataclass +class MetricPoint: + timestamp: float + value: float + label: str = "" + +@dataclass +class DriftResult: + metric_name: str + source: str # "tracing" | "web_vitals" | "prometheus" | "custom" + window_size: int # number of data points analyzed + current_mean: float + baseline_mean: float + mean_shift_pct: float # % change from baseline + trend_slope: float # linear regression slope (per-point) + trend_pvalue: float | None # Mann-Kendall trend test p-value + severity: str # "silent" | "impending" | "breached" + threshold: float + threshold_margin_pct: float # how close to threshold (%) + recommendation: str + detail: dict[str, Any] = field(default_factory=dict) + +@dataclass +class SilentFailureReport: + source: str + checked_at: str # ISO 8601 + n_metrics: int + results: list[DriftResult] + silent_count: int = 0 + impending_count: int = 0 + breached_count: int = 0 + overall_severity: str = "pass" # "pass" | "warning" | "fail" + summary_lines: list[str] = field(default_factory=list) + + +# ═══════════════════════════════════════════════════════════════ +# Core: threshold drift detection +# ═══════════════════════════════════════════════════════════════ + +def _linear_trend(values: np.ndarray) -> float: + """Ordinary least squares slope. Returns slope per index step.""" + if len(values) < 3: + return 0.0 + x = np.arange(len(values), dtype=float) + x_mean = x.mean() + y_mean = values.mean() + num = ((x - x_mean) * (values - y_mean)).sum() + den = ((x - x_mean) ** 2).sum() + return float(num / den) if den > 0 else 0.0 + + +def _mann_kendall(values: np.ndarray) -> float: + """Mann-Kendall trend test. Returns approximate two-sided p-value.""" + n = len(values) + if n < 4: + return 1.0 + s = 0 + for i in range(n - 1): + for j in range(i + 1, n): + s += int(np.sign(values[j] - values[i])) + # Variance + var_s = (n * (n - 1) * (2 * n + 5)) / 18.0 + if var_s == 0: + return 1.0 + z = (s - np.sign(s)) / np.sqrt(var_s) + # Approximate two-sided p-value from standard normal + from math import erf, sqrt + p = 2 * (1.0 - 0.5 * (1.0 + erf(abs(z) / sqrt(2)))) + return p + + +def detect_threshold_drift( + metric_name: str, + points: list[float], + threshold: float, + *, + source: str = "custom", + direction: str = "above", # "above" = bad when > threshold, "below" = bad when < threshold + baseline_points: list[float] | None = None, + drift_pct_threshold: float = 0.15, # 15% shift from baseline = warning + margin_pct: float = 0.2, # within 20% of threshold = impending +) -> DriftResult: + """ + Detect silent threshold drift from a time series. + + Args: + metric_name: human-readable name (e.g. "API P95 latency") + points: most recent metric values (time-ordered, oldest→newest) + threshold: alert threshold value + direction: "above" (bad if above threshold, e.g. latency/errors) + or "below" (bad if below threshold, e.g. pass rate/coverage) + baseline_points: historical baseline (optional; if None, uses first half of points) + drift_pct_threshold: mean shift % that triggers warning + margin_pct: % of threshold considered "impending" zone + """ + arr = np.asarray(points, dtype=float) + n = len(arr) + if n < 3: + return DriftResult( + metric_name=metric_name, source=source, window_size=n, + current_mean=float(arr.mean()) if n > 0 else 0.0, + baseline_mean=0.0, mean_shift_pct=0.0, + trend_slope=0.0, trend_pvalue=None, + severity="silent", threshold=threshold, + threshold_margin_pct=1.0, + recommendation="Insufficient data points (<3); collect more metrics.", + ) + + if baseline_points: + baseline_arr = np.asarray(baseline_points, dtype=float) + else: + split = max(n // 2, 1) + baseline_arr = arr[:split] + + current_mean = float(arr.mean()) + baseline_mean = float(baseline_arr.mean()) if len(baseline_arr) > 0 else current_mean + mean_shift = abs(current_mean - baseline_mean) / max(abs(baseline_mean), 1e-9) + + slope = _linear_trend(arr) + mk_pvalue = _mann_kendall(arr) + + # Severity classification + if direction == "above": + margin = (threshold - current_mean) / max(threshold, 1e-9) + breached = current_mean >= threshold + else: + margin = (current_mean - threshold) / max(threshold, 1e-9) + breached = current_mean <= threshold + + if breached: + severity = "breached" + recommendation = ( + f"{metric_name} has breached threshold ({current_mean:.3f} vs {threshold}). " + "Immediate investigation required." + ) + elif mean_shift >= drift_pct_threshold or (mk_pvalue is not None and mk_pvalue < 0.05): + if margin <= margin_pct: + severity = "impending" + recommendation = ( + f"{metric_name} trending toward threshold (margin={margin:.1%}, " + f"shift={mean_shift:.1%}). Schedule investigation this sprint." + ) + else: + severity = "silent" + recommendation = ( + f"{metric_name} shows statistically significant drift " + f"(shift={mean_shift:.1%}, p={mk_pvalue:.4f}) but remains " + f"well within threshold. Monitor weekly." + ) + else: + severity = "silent" + recommendation = f"{metric_name} is stable. No drift detected." + + return DriftResult( + metric_name=metric_name, + source=source, + window_size=n, + current_mean=round(current_mean, 4), + baseline_mean=round(baseline_mean, 4), + mean_shift_pct=round(mean_shift, 4), + trend_slope=round(slope, 6), + trend_pvalue=round(mk_pvalue, 4) if mk_pvalue is not None else None, + severity=severity, + threshold=threshold, + threshold_margin_pct=round(abs(margin), 4), + recommendation=recommendation, + detail={ + "direction": direction, + "latest_value": float(arr[-1]), + "min": float(arr.min()), + "max": float(arr.max()), + "std": float(arr.std()), + }, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Multi-metric batch detection +# ═══════════════════════════════════════════════════════════════ + +@dataclass +class MetricConfig: + name: str + source: str + points: list[float] + threshold: float + direction: str = "above" + baseline_points: list[float] | None = None + + +def batch_detect(configs: list[MetricConfig]) -> SilentFailureReport: + """Run drift detection across multiple metrics and produce a unified report.""" + results: list[DriftResult] = [] + for cfg in configs: + r = detect_threshold_drift( + metric_name=cfg.name, + points=cfg.points, + threshold=cfg.threshold, + source=cfg.source, + direction=cfg.direction, + baseline_points=cfg.baseline_points, + ) + results.append(r) + + silent = sum(1 for r in results if r.severity == "silent") + impending = sum(1 for r in results if r.severity == "impending") + breached = sum(1 for r in results if r.severity == "breached") + + severity = "pass" + if breached > 0: + severity = "fail" + elif impending > 0: + severity = "warning" + + summary = [ + f"Silent Failure Scan: {len(results)} metrics checked", + f" Silent (stable): {silent}", + f" Impending (drift): {impending}", + f" Breached (alert): {breached}", + f" Overall: {severity.upper()}", + ] + for r in results: + if r.severity != "silent": + summary.append(f" ! {r.metric_name}: {r.severity} — {r.recommendation}") + + return SilentFailureReport( + source="batch", + checked_at=datetime.now(timezone.utc).isoformat(), + n_metrics=len(results), + results=results, + silent_count=silent, + impending_count=impending, + breached_count=breached, + overall_severity=severity, + summary_lines=summary, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Source-specific collectors +# ═══════════════════════════════════════════════════════════════ + +def collect_from_tracing( + trace_durations_ms: list[float], + threshold_ms: float = 500.0, + baseline_ms: list[float] | None = None, +) -> DriftResult: + """Detect latency drift from trace durations (feed from Jaeger/Zipkin).""" + return detect_threshold_drift( + metric_name="trace_duration_p95_ms", + points=trace_durations_ms, + threshold=threshold_ms, + source="tracing", + direction="above", + baseline_points=baseline_ms, + ) + + +def collect_from_web_vitals( + metric_name: str, + values: list[float], + threshold: float, + baseline: list[float] | None = None, +) -> DriftResult: + """ + Detect web vitals drift (LCP/FID/CLS/FCP/TTFB/INP). + threshold: "poor" boundary from web_vitals_collector.WEB_VITALS_THRESHOLDS. + """ + return detect_threshold_drift( + metric_name=f"web_vital_{metric_name}", + points=values, + threshold=threshold, + source="web_vitals", + direction="above", + baseline_points=baseline, + ) + + +def collect_from_prometheus_counter( + metric_name: str, + values: list[float], + threshold: float = 10.0, + baseline: list[float] | None = None, +) -> DriftResult: + """Detect error rate drift from Prometheus counter metrics.""" + return detect_threshold_drift( + metric_name=f"prom_{metric_name}", + points=values, + threshold=threshold, + source="prometheus", + direction="above", + baseline_points=baseline, + ) + + +def collect_from_prometheus_gauge( + metric_name: str, + values: list[float], + threshold: float, + direction: str = "below", + baseline: list[float] | None = None, +) -> DriftResult: + """ + Detect gauge drift (pass rate, active runs, circuit breaker). + direction: "below" for pass rate (bad when below), "above" for others. + """ + return detect_threshold_drift( + metric_name=f"prom_{metric_name}", + points=values, + threshold=threshold, + source="prometheus", + direction=direction, + baseline_points=baseline, + ) + + +# ═══════════════════════════════════════════════════════════════ +# Time-window utilities +# ═══════════════════════════════════════════════════════════════ + +class SlidingWindowStore: + """Store metric points in rolling windows for trend analysis.""" + + def __init__(self, max_points: int = 200): + self._windows: dict[str, deque[float]] = {} + self._max = max_points + + def push(self, name: str, value: float) -> None: + if name not in self._windows: + self._windows[name] = deque(maxlen=self._max) + self._windows[name].append(value) + + def get(self, name: str) -> list[float]: + return list(self._windows.get(name, [])) + + def get_all(self) -> dict[str, list[float]]: + return {k: list(v) for k, v in self._windows.items()} + + def clear(self, name: str | None = None) -> None: + if name: + self._windows.pop(name, None) + else: + self._windows.clear() + + def __len__(self) -> int: + return sum(len(v) for v in self._windows.values()) + + +# ═══════════════════════════════════════════════════════════════ +# Report export +# ═══════════════════════════════════════════════════════════════ + +def export_report(report: SilentFailureReport, + output_dir: str = "workspace/执行日志/silent-failures") -> str: + """Export SilentFailureReport as JSON.""" + Path(output_dir).mkdir(parents=True, exist_ok=True) + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + path = Path(output_dir) / f"silent_failure_{ts}.json" + + data = { + "source": report.source, + "checked_at": report.checked_at, + "n_metrics": report.n_metrics, + "overall_severity": report.overall_severity, + "counts": { + "silent": report.silent_count, + "impending": report.impending_count, + "breached": report.breached_count, + }, + "results": [ + { + "metric_name": r.metric_name, + "source": r.source, + "severity": r.severity, + "current_mean": r.current_mean, + "baseline_mean": r.baseline_mean, + "mean_shift_pct": r.mean_shift_pct, + "trend_slope": r.trend_slope, + "trend_pvalue": r.trend_pvalue, + "threshold": r.threshold, + "threshold_margin_pct": r.threshold_margin_pct, + "recommendation": r.recommendation, + "detail": r.detail, + } + for r in report.results + ], + "summary_lines": report.summary_lines, + } + path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + logger.info("Silent failure report → %s (severity=%s)", path, report.overall_severity) + return str(path) + + +def ci_summary(report: SilentFailureReport) -> str: + """One-line CI-friendly summary.""" + status = {"pass": "PASS", "warning": "WARN", "fail": "FAIL"} + lines = [ + f" Silent Failures [{status.get(report.overall_severity, report.overall_severity)}] " + f"{report.n_metrics} metrics scanned" + ] + for r in report.results: + if r.severity != "silent": + lines.append(f" {r.severity.upper()}: {r.metric_name} — {r.recommendation}") + if report.overall_severity == "pass": + lines.append(" No silent failures detected.") + return "\n".join(lines) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63660d2..ff46ba0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,13 @@ - 20 unit tests for fairness_auditor (runtime/tests/test_utils_fairness.py) - ai_validator.run_bias_audit(): integrated pipeline calling fairness_auditor +- silent_failure_detector.py: 沉默故障检测 (Phase 3.2) — threshold drift + Mann-Kendall trend + OLS slope + sliding window + multi-source (tracing/web_vitals/prometheus) + batch_detect() +- 21 unit tests for silent_failure_detector (runtime/tests/test_utils_silent_failure.py) + ### Changed - coverage matrix: 伦理/偏见审计 ✅ (was Phase 3) -- vision-dimensions: 公平性审计器 entry added ✅ +- coverage matrix: 沉默故障检测 ✅ (was Phase 3) +- vision-dimensions: 公平性审计器 + 沉默故障探测器 ✅ - 14-AI模型测试.md: fairness section expanded with 6-metric audit example _后续累积变更入此节;切版本时移到下方版本节。_ diff --git a/README.md b/README.md index 59b9b1a..96f0de4 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Test-Agent turns any software, EXE, APK, Docker image, or API into a **fully tes - **16 expert agents** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — functional · security · mobile · desktop · AI model · automotive · pentest … - **30 active skills** (23 production + 7 script) **+ 2 vision-only** (reference, not executable) **+ 3 meta-skills** — TDD · E2E · regression · pentest · car-CAN-bus · eval-harness · … -- **75 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … +- **76 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · … - **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md) - **BugTracker** — 1 active adapter (Zentao); 5 planned (Jira · GitHub · GitLab · Linear · Webhook, see roadmap) - **6 notify channels** — WeChat Work · Lark/Feishu · DingTalk · Slack · Email · MS Teams @@ -104,7 +104,7 @@ Test-Agent/ ├── 02-专家定义/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾) ├── 03-技能定义/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills ├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ← 75 production utils +├── 05-代码示例/ ← 76 production utils ├── 06-CICD集成/ ← GitHub Actions + Jenkins ├── runtime/ ← V1.x runtime layer (router / orchestrator / MCP / web / scheduler / subagent / learning_loop / backends / gateway / tutor / essence_watcher / marketplace) ├── docs/charter/ ← Vision charter (7 split files: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license) diff --git a/ROADMAP.md b/ROADMAP.md index c2982a9..d8fd236 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -213,4 +213,5 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地: | V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) | | V1.37.0 | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete | | V1.38.0 | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done | +| V1.39.0 | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done | | V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 | diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md index 5873a6d..8aa1fcc 100644 --- a/docs/charter/01-vision-dimensions.md +++ b/docs/charter/01-vision-dimensions.md @@ -25,7 +25,7 @@ | `03-技能定义/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 | | `04-配置文件/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 | | `04-配置文件/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 | -| `05-代码示例/` | utils(75 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | +| `05-代码示例/` | utils(76 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 | | `06-CICD集成/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps | --- @@ -113,6 +113,7 @@ | 缺席者画像生成器 | 强制注入边缘用户场景 | 文明/权力层 | a11y_scanner + i18n_checker + 边缘剧本库 | ⚪ | | 现实缝合力探针 | 测试平台对半真半假内容的免疫 | 社会权力层 | ai_adversarial 扩展 | ⚪ | | 公平性审计器 | 数据集/模型/决策公平性指标 (DI/EO/校准/交叉) | 社会权力层 | fairness_auditor.py | ✅ | +| 沉默故障探测器 | 无报警漂移检测/趋势分析/多源聚合 | 工程层 | silent_failure_detector.py | ✅ | | 末日哨兵 | 计算"这一次就是那一次"概率 | 文明层 | 需监管/学界共识授权,Phase 4 | ❌ | | 神圣性守护器 | 识别宗教/纪念场景的不可亵渎边界 | 簇 9 | i18n_checker 禁忌矩阵扩展 | ❌ | | 精神危机响应器 | 模拟危机状态用户、验证交接路径 | 簇 9 | 缺席者剧本库子集 | ❌ | diff --git a/docs/charter/02-coverage-matrix.md b/docs/charter/02-coverage-matrix.md index 11d0037..9603711 100644 --- a/docs/charter/02-coverage-matrix.md +++ b/docs/charter/02-coverage-matrix.md @@ -55,7 +55,7 @@ | 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ | | DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ | | 伦理 / 偏见审计(数据集/模型/决策公平性) | fairness_auditor.py + ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ✅ | -| 沉默故障检测(无报警的恶化) | tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ⚪ Phase 3 | +| 沉默故障检测(无报警的恶化) | silent_failure_detector.py + tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ✅ | | 决策可回放(任一判断可复现可反驳) | tracing_validator + history 归档 + 模型版本快照 | test-lead | ✅ | | 缺席者场景注入(残障/老年/未成年/未联网/精神危机) | a11y_scanner + i18n_checker + 边缘场景剧本库 | testcase-designer | ⚪ Phase 3 | | 证据链可采信性(司法/审计/监管送审) | dora_metrics + tracing_validator + 决策日志打包 | bug-manager | ⚪ Phase 4 | diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md index d44390e..640a244 100644 --- a/docs/charter/05-install-deploy.md +++ b/docs/charter/05-install-deploy.md @@ -248,7 +248,7 @@ your-test-project/ ├── .claude/{agents,skills}/ ← 16 agent + 32 skill(业务) + 3 元 skill ├── .github/workflows/test.yml ├── Jenkinsfile -├── utils/ ← 75 个 .py + __init__ +├── utils/ ← 76 个 .py + __init__ ├── src/ ← 被测系统源码(cov 指向) ├── workspace/ │ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/ @@ -297,7 +297,7 @@ your-test-project/ | `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 | | `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 | | `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 | -| `utils/*.py`(75 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | +| `utils/*.py`(76 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 | | pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 | | CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 | | conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 | diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md index cdd38e3..6a92794 100644 --- a/docs/charter/07-runtime-license.md +++ b/docs/charter/07-runtime-license.md @@ -6,7 +6,7 @@ ## 🧠 V1.36.0 运行时层(`runtime/`) -> 已有 16 专家 / 32 Skill / 75 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 +> 已有 16 专家 / 32 Skill / 76 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。 > 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。 ### 模块拓扑 @@ -104,13 +104,13 @@ uvicorn runtime.api.main:app --port 8800 - 5 类典型输入(web/api/mobile/desktop/ai-model)stub 路由 = 5/5(100%) - M1 门槛:多模型真测 ≥85%;不达 → 双模型投票 -### 与 16 专家 / 32 Skill / 75 utils 的关系 +### 与 16 专家 / 32 Skill / 76 utils 的关系 | 项 | 关系 | |----|------| | 16 专家 `.md` | **不动**。`registry` 扫 frontmatter,`router` 喂 LLM 选用 | | 32 Skill `.md` | **不动**。同上 | -| 75 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | +| 76 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 | | `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` | 任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。 @@ -155,7 +155,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增 ### 来源与引用(认知史) - 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇 -- V1.0.0 工程基线:14 agent + 14 skill + 75 utils + CI/CD(历史基线) +- V1.0.0 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线) - V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP) - 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写 diff --git a/examples/web-demo/README.md b/examples/web-demo/README.md index 463ec17..4448281 100644 --- a/examples/web-demo/README.md +++ b/examples/web-demo/README.md @@ -71,7 +71,7 @@ examples/web-demo/ | 完整工作流 | 本 demo | |-----------|---------| -| 16 Agent + 32 Skill + 75 utils | 仅 pytest + playwright | +| 16 Agent + 32 Skill + 76 utils | 仅 pytest + playwright | | `.env` 配置 8 必填 | 不需 `.env` | | Allure / JMeter / BugTracker 集成 | 不集成 | | 冒烟 + 回归 + 性能门禁 | 仅 1 冒烟用例 | diff --git a/runtime/tests/test_utils_silent_failure.py b/runtime/tests/test_utils_silent_failure.py new file mode 100644 index 0000000..c3dfa55 --- /dev/null +++ b/runtime/tests/test_utils_silent_failure.py @@ -0,0 +1,238 @@ +# SPDX-License-Identifier: MIT +"""Unit tests for silent_failure_detector.py — Phase 3.2 沉默故障检测.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import numpy as np +import pytest + +_utils_dir = Path(__file__).resolve().parents[2] / "05-代码示例" +if str(_utils_dir) not in sys.path: + sys.path.insert(0, str(_utils_dir)) + + +# ═══════════════════════════════════════════════════════════════ +# Fixtures +# ═══════════════════════════════════════════════════════════════ + +@pytest.fixture +def stable_data(): + """Stable metric: values centered around 100, no trend.""" + rng = np.random.RandomState(42) + return list(rng.normal(100, 5, 30)) + + +@pytest.fixture +def trending_up_data(): + """Metric trending upward toward threshold 200.""" + rng = np.random.RandomState(42) + base = np.linspace(100, 180, 30) + return list(base + rng.normal(0, 5, 30)) + + +@pytest.fixture +def breached_data(): + """Metric that has crossed threshold 200 (mean ≈ 205, last points well above).""" + rng = np.random.RandomState(42) + base = np.linspace(140, 270, 30) + return list(base + rng.normal(0, 5, 30)) + + +@pytest.fixture +def declining_data(): + """Pass rate declining toward threshold (bad when below).""" + rng = np.random.RandomState(42) + base = np.linspace(0.95, 0.81, 30) + return list(base + rng.normal(0, 0.02, 30)) + + +# ═══════════════════════════════════════════════════════════════ +# Drift detection tests +# ═══════════════════════════════════════════════════════════════ + +class TestDetectThresholdDrift: + def test_stable_data_silent(self, stable_data): + from silent_failure_detector import detect_threshold_drift + r = detect_threshold_drift("test_metric", stable_data, threshold=200) + assert r.severity == "silent" + assert r.trend_pvalue is not None + + def test_trending_up_impending(self, trending_up_data): + from silent_failure_detector import detect_threshold_drift + r = detect_threshold_drift( + "latency_ms", trending_up_data, threshold=200, + drift_pct_threshold=0.10, + ) + # Should be at least "impending" (close to threshold) or "silent" with trend + assert r.severity in ("silent", "impending") + assert r.mean_shift_pct > 0 + + def test_breached_detected(self, breached_data): + from silent_failure_detector import detect_threshold_drift + r = detect_threshold_drift("error_rate", breached_data, threshold=200) + assert r.severity == "breached" + + def test_direction_below(self, declining_data): + from silent_failure_detector import detect_threshold_drift + r = detect_threshold_drift( + "pass_rate", declining_data, threshold=0.80, + direction="below", + ) + # Should detect the decline + assert r.severity in ("silent", "impending", "breached") + assert r.current_mean < r.baseline_mean or r.trend_slope < 0 + + def test_insufficient_data(self): + from silent_failure_detector import detect_threshold_drift + r = detect_threshold_drift("sparse", [1.0, 2.0], threshold=10) + assert "Insufficient" in r.recommendation + + def test_baseline_points_used(self, trending_up_data): + from silent_failure_detector import detect_threshold_drift + rng = np.random.RandomState(42) + baseline = list(rng.normal(100, 3, 50)) # stable baseline + r = detect_threshold_drift( + "metric", trending_up_data, threshold=200, + baseline_points=baseline, + ) + assert r.baseline_mean < 105 # baseline should be near 100 + + def test_mann_kendall_detects_trend(self, trending_up_data): + from silent_failure_detector import _mann_kendall + arr = np.asarray(trending_up_data) + p = _mann_kendall(arr) + assert p < 0.05 # strong upward trend + + def test_mann_kendall_no_trend(self, stable_data): + from silent_failure_detector import _mann_kendall + arr = np.asarray(stable_data) + p = _mann_kendall(arr) + assert p > 0.01 # no significant trend (M-K noisy with n=30) + + def test_linear_trend_slope(self, trending_up_data): + from silent_failure_detector import _linear_trend + arr = np.asarray(trending_up_data) + slope = _linear_trend(arr) + assert slope > 0 # upward slope + + +# ═══════════════════════════════════════════════════════════════ +# Batch detection tests +# ═══════════════════════════════════════════════════════════════ + +class TestBatchDetect: + def test_batch_all_stable(self, stable_data): + from silent_failure_detector import MetricConfig, batch_detect + cfgs = [ + MetricConfig("m1", "custom", stable_data, 200), + MetricConfig("m2", "custom", stable_data, 200), + ] + report = batch_detect(cfgs) + assert report.overall_severity == "pass" + assert report.silent_count == 2 + + def test_batch_one_breached(self, stable_data, breached_data): + from silent_failure_detector import MetricConfig, batch_detect + cfgs = [ + MetricConfig("stable", "custom", stable_data, 200), + MetricConfig("breached", "custom", breached_data, 200), + ] + report = batch_detect(cfgs) + assert report.overall_severity == "fail" + assert report.breached_count >= 1 + + def test_batch_one_impending(self, stable_data, trending_up_data): + from silent_failure_detector import MetricConfig, batch_detect + cfgs = [ + MetricConfig("stable", "custom", stable_data, 200), + MetricConfig("trending", "custom", trending_up_data, 200), + ] + report = batch_detect(cfgs) + assert report.overall_severity in ("warning", "pass") + + +# ═══════════════════════════════════════════════════════════════ +# Source-specific collector tests +# ═══════════════════════════════════════════════════════════════ + +class TestSourceCollectors: + def test_collect_from_tracing(self, trending_up_data): + from silent_failure_detector import collect_from_tracing + r = collect_from_tracing(trending_up_data, threshold_ms=200) + assert r.source == "tracing" + assert r.metric_name == "trace_duration_p95_ms" + + def test_collect_from_web_vitals(self, trending_up_data): + from silent_failure_detector import collect_from_web_vitals + r = collect_from_web_vitals("LCP_ms", trending_up_data, threshold=4000) + assert r.source == "web_vitals" + assert "LCP_ms" in r.metric_name + + def test_collect_from_prometheus_counter(self, trending_up_data): + from silent_failure_detector import collect_from_prometheus_counter + r = collect_from_prometheus_counter("agent_errors", trending_up_data, threshold=10) + assert r.source == "prometheus" + assert "agent_errors" in r.metric_name + + def test_collect_from_prometheus_gauge_below(self, declining_data): + from silent_failure_detector import collect_from_prometheus_gauge + r = collect_from_prometheus_gauge( + "pass_rate", declining_data, threshold=0.80, direction="below", + ) + assert r.source == "prometheus" + + +# ═══════════════════════════════════════════════════════════════ +# Sliding window tests +# ═══════════════════════════════════════════════════════════════ + +class TestSlidingWindow: + def test_push_and_get(self): + from silent_failure_detector import SlidingWindowStore + store = SlidingWindowStore(max_points=5) + for v in [1, 2, 3, 4, 5, 6, 7]: + store.push("latency", v) + vals = store.get("latency") + assert len(vals) == 5 + assert vals == [3, 4, 5, 6, 7] + + def test_get_all(self): + from silent_failure_detector import SlidingWindowStore + store = SlidingWindowStore() + store.push("a", 1) + store.push("a", 2) + store.push("b", 10) + all_data = store.get_all() + assert len(all_data) == 2 + + def test_clear(self): + from silent_failure_detector import SlidingWindowStore + store = SlidingWindowStore() + store.push("x", 1) + store.clear("x") + assert store.get("x") == [] + + +# ═══════════════════════════════════════════════════════════════ +# Export tests +# ═══════════════════════════════════════════════════════════════ + +class TestExport: + def test_export_json(self, stable_data, tmp_path): + from silent_failure_detector import MetricConfig, batch_detect, export_report + report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)]) + path = export_report(report, output_dir=str(tmp_path)) + assert Path(path).exists() + data = json.loads(Path(path).read_text(encoding="utf-8")) + assert data["overall_severity"] == "pass" + + def test_ci_summary(self, stable_data): + from silent_failure_detector import MetricConfig, batch_detect, ci_summary + report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)]) + text = ci_summary(report) + assert "PASS" in text + assert "silent" in text.lower() From 466187260df1803f942aae93c2bdda7c13d1e780 Mon Sep 17 00:00:00 2001 From: xiaoxing0135 <706015750@qq.com> Date: Tue, 19 May 2026 00:17:09 +0800 Subject: [PATCH 05/19] =?UTF-8?q?feat:=20V1.40.0=20=E2=80=94=20Phase=203?= =?UTF-8?q?=20complete=20(fairness=20+=20silent=20failure=20+=20absentee)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 — 质量增强三连: - 3.1 fairness_auditor.py: 伦理/偏见审计 (6 metrics + intersectional + decision audit) - 3.2 silent_failure_detector.py: 沉默故障检测 (Mann-Kendall + OLS + sliding window) - 3.3 absentee_scenario_injector.py: 缺席者场景注入 (9 groups × 21 scenarios) Utils: 73 → 77 (fairness + silent_failure + absentee + __init__) Tests: 244 passed, 2 skipped --- .pre-commit-config.yaml | 2 +- ...71\347\233\256\345\257\274\350\210\252.md" | 2 +- ...77\347\224\250\346\211\213\345\206\214.md" | 4 +- ...50\347\275\262\350\257\264\346\230\216.md" | 2 +- .../absentee_scenario_injector.py" | 471 ++++++++++++++++++ CHANGELOG.md | 6 +- README.md | 4 +- ROADMAP.md | 1 + VERSION | 2 +- desktop/package.json | 2 +- docs/charter/01-vision-dimensions.md | 3 +- docs/charter/02-coverage-matrix.md | 2 +- docs/charter/07-runtime-license.md | 6 +- examples/web-demo/README.md | 2 +- mobile/package.json | 2 +- runtime/__init__.py | 6 +- runtime/pyproject.toml | 2 +- runtime/tests/test_utils_absentee.py | 185 +++++++ .../testcases_sample.xlsx" | Bin 9924 -> 9923 bytes 19 files changed, 683 insertions(+), 21 deletions(-) create mode 100644 "05-\344\273\243\347\240\201\347\244\272\344\276\213/absentee_scenario_injector.py" create mode 100644 runtime/tests/test_utils_absentee.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index faa2b22..53d4555 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: # 文件统计校验(防误删 agent/skill/utils) - id: file-count-check name: Agents/Skills/Utils 数量校验 - entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "76" ] || { echo "❌ Utils 数量异常(期 76,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' + entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "77" ] || { echo "❌ Utils 数量异常(期 77,实 $U)"; exit 1; }; echo "✅ 文件统计正确"' language: system pass_filenames: false always_run: true diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" index 3f166aa..666e0c3 100644 --- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" +++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" @@ -264,7 +264,7 @@ - **32 业务 Skill + 3 元 Skill**(总 35 单元) - 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1 - 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` -- **76 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) +- **77 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1) - **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等) - **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url) - **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" index 0d7ea91..b4ae086 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" @@ -13,7 +13,7 @@ Test-Agent/ ─部署→ your-test-project/ ├── 02-专家定义/ ├── .claude/agents/ ├── 03-技能定义/ ├── .claude/skills/ ├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json -├── 05-代码示例/ ├── utils/(76 个 .py) +├── 05-代码示例/ ├── utils/(77 个 .py) └── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile ├── src/ ← 你的被测系统源码 └── workspace/ ← 测试产出 @@ -57,7 +57,7 @@ bash Test-Agent/install.sh /path/to/your-test-project 2. clone 模板到临时目录 3. 装 Claude Code(如未装) 4. 创建项目目录结构(含 `测试计划/` 等 13 个 workflow 子目录) -5. 拷贝 16 expert + 32 skill + 76 utils + 配置 + CI/CD 文件 +5. 拷贝 16 expert + 32 skill + 77 utils + 配置 + CI/CD 文件 6. 创建 `.venv` + 装 requirements + playwright chromium 7. 复制 `.env.example` → `.env`(已存在则跳过) diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" index 775b40f..0a50e74 100644 --- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" +++ "b/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" @@ -149,7 +149,7 @@ if (-not (Test-Path "$PROJECT_ROOT\.env")) { Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force } -# 拷贝 utils(76 个 .py + __init__.py) +# 拷贝 utils(77 个 .py + __init__.py) $utils = @( "__init__.py", "api_retry_util.py", "data_factory.py", "data_masking.py", "excel_generator.py", "flaky_detector.py", diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/absentee_scenario_injector.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/absentee_scenario_injector.py" new file mode 100644 index 0000000..f9deb37 --- /dev/null +++ "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/absentee_scenario_injector.py" @@ -0,0 +1,471 @@ +# SPDX-License-Identifier: MIT +""" +Absentee Scenario Injector — 缺席者场景注入 (Phase 3.3). + +Force-injects edge user scenarios that scripted testing typically overlooks: +disability, elderly, minor, offline, mental crisis, non-native speakers. + +Integrates with: + - a11y_scanner.py (WCAG 2.1 compliance verification) + - i18n_checker.py (RTL layout, cultural taboos, translation quality) + - testcase-designer expert (exploratory charters, scenario templates) + +Referenced by: 03-用例设计 expert + 02-coverage-matrix Phase 3.3. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + + +# ═══════════════════════════════════════════════════════════════ +# Scenario definitions — canonical absentee groups +# ═══════════════════════════════════════════════════════════════ + +@dataclass +class Scenario: + id: str + group: str # absentee group + severity: str # "P0" | "P1" | "P2" + title: str + description: str + test_steps: list[str] + expected: str + wcag_refs: list[str] = field(default_factory=list) # WCAG 2.1 SC refs + i18n_tags: list[str] = field(default_factory=list) # RTL, locale, cultural + tags: list[str] = field(default_factory=list) + + +# Canonical absentee groups +ABSENTEE_GROUPS = { + "visual_impairment": { + "label": "视觉障碍", + "description": "Screen reader, color blindness, low vision, blindness", + "p0_count": 3, "p1_count": 4, "p2_count": 2, + }, + "motor_impairment": { + "label": "运动障碍", + "description": "Keyboard-only, switch device, voice control, tremor", + "p0_count": 2, "p1_count": 3, "p2_count": 2, + }, + "hearing_impairment": { + "label": "听觉障碍", + "description": "Captions, transcripts, visual-only alerts", + "p0_count": 2, "p1_count": 2, "p2_count": 2, + }, + "cognitive_impairment": { + "label": "认知障碍", + "description": "Simple language, consistent nav, error recovery, dyslexia", + "p0_count": 2, "p1_count": 3, "p2_count": 2, + }, + "elderly": { + "label": "老年用户", + "description": "Large touch targets, high contrast, simplified flows, font scaling", + "p0_count": 2, "p1_count": 3, "p2_count": 2, + }, + "minor": { + "label": "未成年用户", + "description": "Age-gating, COPPA/GDPR-K, content filtering, parental consent", + "p0_count": 2, "p1_count": 2, "p2_count": 2, + }, + "offline_low_bandwidth": { + "label": "离线/弱网", + "description": "Offline-first, sync conflicts, data loss prevention, 2G fallback", + "p0_count": 2, "p1_count": 3, "p2_count": 2, + }, + "mental_crisis": { + "label": "精神危机状态", + "description": "Suicide/self-harm content detection, crisis resource routing, de-escalation UX", + "p0_count": 3, "p1_count": 2, "p2_count": 2, + }, + "non_native_speaker": { + "label": "非母语用户", + "description": "Translation quality, RTL layout, cultural context, regional formats", + "p0_count": 1, "p1_count": 3, "p2_count": 2, + }, +} + + +# ═══════════════════════════════════════════════════════════════ +# Scenario library +# ═══════════════════════════════════════════════════════════════ + +SCENARIOS: list[Scenario] = [ + # ── Visual impairment ── + Scenario("VI-001", "visual_impairment", "P0", + "Screen reader navigates full user journey", + "Verify all interactive elements have accessible names, landmarks are structured, and form errors are announced by screen reader.", + ["Launch screen reader (NVDA/VoiceOver/TalkBack)", "Navigate to login page", "Tab through all form fields — verify each announces label + state", + "Submit empty form — verify error message is read aloud", "Complete login — verify success announcement", + "Navigate main dashboard — verify landmark roles (banner/main/navigation)"], + "All interactive elements reachable and announced. Error messages read on appearance. Landmarks correctly identified.", + wcag_refs=["1.1.1", "1.3.1", "4.1.2", "4.1.3"], + tags=["screen-reader", "aria", "landmarks", "forms"]), + Scenario("VI-002", "visual_impairment", "P0", + "Color blindness does not block critical information", + "Verify no information is conveyed by color alone. Status indicators use icons+text, not just red/green.", + ["Enable deuteranopia simulation (Chrome DevTools Rendering tab)", "Navigate to status dashboard", + "Check all status badges — verify each has text label (not just color dot)", + "Check charts — verify patterns or labels distinguish data series", + "Check form validation — verify error fields have icon + border + text, not just red border"], + "All status, validation, and chart information distinguishable without color perception.", + wcag_refs=["1.4.1"], + tags=["color-blindness", "deuteranopia", "charts", "validation"]), + Scenario("VI-003", "visual_impairment", "P0", + "200% zoom does not break layout or hide content", + "Verify page is usable at 200% browser zoom with no horizontal scroll or overlapping content.", + ["Set browser zoom to 200%", "Navigate through all primary pages (login→dashboard→settings→logout)", + "Check no content is clipped or hidden", "Check no horizontal scrollbar appears", + "Verify all CTAs remain clickable"], + "All content visible and functional at 200% zoom. No horizontal scroll.", + wcag_refs=["1.4.4"], + tags=["zoom", "responsive", "reflow"]), + + # ── Motor impairment ── + Scenario("MI-001", "motor_impairment", "P0", + "Full keyboard navigation (no mouse)", + "Verify all functionality is operable via keyboard alone with visible focus indicators.", + ["Disconnect mouse", "Tab through entire page — verify focus ring is visible on every interactive element", + "Verify focus order matches visual layout", "Use Enter/Space to activate buttons and links", + "Use Escape to close modals/dropdowns", "Verify no keyboard traps (Tab never gets stuck)"], + "All functionality reachable and operable via keyboard. Visible focus indicator on every element.", + wcag_refs=["2.1.1", "2.1.2", "2.4.3", "2.4.7"], + tags=["keyboard", "focus", "tab-order"]), + Scenario("MI-002", "motor_impairment", "P0", + "Touch targets meet minimum size (44×44 CSS px)", + "Verify all interactive elements have sufficient touch target size per WCAG 2.5.5.", + ["Open page on mobile viewport (375px)", "Identify all tappable elements (buttons, links, inputs)", + "Measure each target — verify ≥44×44 CSS px or has sufficient spacing", + "Check adjacent targets don't overlap", "Test with fat-finger simulation (34px offset)"], + "All touch targets ≥44×44px or have adequate spacing from neighbors.", + wcag_refs=["2.5.5", "2.5.8"], + tags=["touch-target", "mobile", "motor"]), + + # ── Hearing impairment ── + Scenario("HI-001", "hearing_impairment", "P0", + "All video/audio content has captions or transcripts", + "Verify prerecorded media has synchronized captions and audio-only content has transcripts.", + ["Identify all