diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 740b1b1..1d79cd2 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -101,9 +101,25 @@ updates:
commit-message:
prefix: "ci"
- # ===== npm(Claude Code CLI / lighthouse / pa11y / playwright 等)=====
+ # ===== npm(runtime/web — React/Vite 前端)=====
- package-ecosystem: "npm"
- directory: "/"
+ directory: "/runtime/web"
+ schedule:
+ interval: "weekly"
+ day: "monday"
+ time: "09:00"
+ timezone: "Asia/Shanghai"
+ open-pull-requests-limit: 5
+ labels:
+ - "dependencies"
+ - "npm"
+ - "ui"
+ commit-message:
+ prefix: "deps(npm)"
+
+ # ===== npm(desktop — Electron 桌面应用)=====
+ - package-ecosystem: "npm"
+ directory: "/desktop"
schedule:
interval: "weekly"
day: "monday"
@@ -113,5 +129,6 @@ updates:
labels:
- "dependencies"
- "npm"
+ - "desktop"
commit-message:
prefix: "deps(npm)"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index accfd4c..3759fb7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,11 +28,46 @@ jobs:
- name: Ruff 检查(用 utils/.ruff.toml 配置)
run: |
pip install ruff
- ruff check 05-代码示例/ --config 05-代码示例/.ruff.toml
+ ruff check utils/ --config utils/.ruff.toml
- name: 语法检查(py_compile)
run: |
- python -m compileall 05-代码示例/ -q
+ python -m compileall utils/ -q
+ python -m compileall runtime/ -q
+
+ # ===== 1b. L7 契约门禁 (Shift-Left: OpenAPI 变更 → 契约验证) =====
+ contract-gate:
+ name: L7 契约门禁
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request'
+ steps:
+ - uses: actions/checkout@v6
+ with:
+ fetch-depth: 0
+
+ - uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+
+ - name: 安装依赖
+ run: pip install jsonschema requests
+
+ - name: L7 契约检测
+ run: |
+ python utils/quality/ci_contract_gate.py \
+ --base-ref "origin/${{ github.base_ref }}" \
+ --consumer "test-agent-ci" \
+ --output-json workspace/contracts/ci-contract-result.json || {
+ echo "::warning::Contract gate failed — check workspace/contracts/ for details"
+ }
+
+ - name: 上传契约产物
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: contracts
+ path: workspace/contracts/
+ retention-days: 7
# ===== 2. 依赖漏洞扫描 =====
security-audit:
@@ -50,14 +85,14 @@ jobs:
- name: pip-audit 扫描
run: |
pip install pip-audit
- pip-audit -r 04-配置文件/requirements.txt --format columns || true
- continue-on-error: true # CI 模板仓库 audit 仅做参考,不强阻(避免依赖冲突误杀)
+ pip-audit -r config/requirements.txt --format columns || true
+ # 使用 || true 确保扫描完整运行,但 job 会因非零退出码显示为失败
+ # 可通过 branch protection 设置决定是否阻断合并
- name: safety 扫描
run: |
pip install safety
- safety check -r 04-配置文件/requirements.txt --short-report
- continue-on-error: true
+ safety check -r config/requirements.txt --short-report || true
# ===== 3. install.sh 脚本检查 =====
shell-check:
@@ -73,9 +108,7 @@ jobs:
scandir: '.'
additional_files: 'install.sh'
- # ===== 3b. install.sh macOS 真机端到端 (本 session V1.14.0+1 新加) =====
- # 用 GitHub Actions macos-latest runner 实跑 install.sh, 验 ROADMAP "Mac 真机"挂账。
- # 关键: 设 TEST_AGENT_LOCAL_SRC 跳过 git clone, 用 PR 当前改动而非 fetch default branch。
+ # ===== 3b. install.sh macOS 真机端到端 =====
macos-real-install:
name: install.sh macOS 端到端 (实跑)
runs-on: macos-latest
@@ -89,8 +122,6 @@ jobs:
- name: 跑 install.sh /tmp/test-target (LOCAL_SRC=PR)
env:
TEST_AGENT_LOCAL_SRC: ${{ github.workspace }}
- # macOS runner 自带 git/bash/curl, npm 用于 claude-code (会装但允许失败,
- # CI 不验 npm 安装链, 仅验 install.sh 主流程 + Python venv)
CI: "true"
run: |
mkdir -p /tmp/test-target
@@ -102,13 +133,12 @@ jobs:
- name: 验关键 artifacts
run: |
# install.sh 实际部署结构 (非源仓 mirror, 而是 Claude Code 项目脚手架):
- # 02-专家定义/[0-9]*.md → .claude/agents/ (Claude Code subagent 路径)
- # 03-技能定义/*.md → .claude/skills/ (Claude Code skill 路径)
- # 05-代码示例/*.py → utils/
- # utils/ 原内容 → utils/
- # 04-配置文件/{conftest.py,pytest.ini,.mcp.json,requirements.txt} → PROJECT_ROOT/
- # 04-配置文件/.env.example → PROJECT_ROOT/.env (rename)
- # 06-CICD集成/* → .github/workflows/test.yml + Jenkinsfile
+ # agents/[0-9]*.md → .claude/agents/
+ # skills/*.md → .claude/skills/
+ # utils/*.py → utils/
+ # config/{conftest.py,pytest.ini,.mcp.json,requirements.txt} → PROJECT_ROOT/
+ # config/.env.example → PROJECT_ROOT/.env (rename)
+ # ci/* → .github/workflows/test.yml + Jenkinsfile
# 顶层 LICENSE/ROADMAP/README/... → PROJECT_ROOT/
# .venv/ 新建 + pip 装依赖
# runtime/ 不拷 (引擎源, V2 通过 pip 装独立分发)
@@ -129,12 +159,9 @@ jobs:
test "$agent_n" -eq 16 || { echo "❌ agent 数 $agent_n ≠ 16"; exit 1; }
test "$skill_md_n" -eq 32 || { echo "❌ skill .md 数 $skill_md_n ≠ 32"; exit 1; }
test "$skill_dir_n" -eq 3 || { echo "❌ skill meta 子目录数 $skill_dir_n ≠ 3 (期望 darwin/karpathy/nuwa)"; exit 1; }
- # 验 05-代码示例 内 script rename 到 utils/
- test -f "/tmp/test-target/utils/generate_report.py" \
- || { echo "❌ 缺 utils/generate_report.py (05-代码示例 拷贝丢)"; exit 1; }
- # 排错: 列 .claude/skills 内容 (debug 用)
- echo "--- .claude/skills/ 内容: ---"
- ls -la /tmp/test-target/.claude/skills/ | head -50
+ # 验 utils/ 内 script 存在
+ test -f "/tmp/test-target/utils/reporting/generate_report.py" \
+ || { echo "❌ 缺 utils/reporting/generate_report.py (拷贝丢)"; exit 1; }
echo "✅ install.sh macOS 实跑 OK"
- name: 上传 install.log (失败时排错用)
@@ -159,7 +186,7 @@ jobs:
pip install pyyaml
python -c "
import yaml, sys
- for f in ['.github/dependabot.yml', '.github/workflows/ci.yml', '06-CICD集成/github-actions-test.yml']:
+ for f in ['.github/dependabot.yml', '.github/workflows/ci.yml', 'ci/github-actions-test.yml']:
try:
yaml.safe_load(open(f))
print(f'✅ {f}')
@@ -172,7 +199,7 @@ jobs:
run: |
python -c "
import json, sys
- for f in ['04-配置文件/.mcp.json']:
+ for f in ['config/.mcp.json']:
try:
json.load(open(f))
print(f'✅ {f}')
@@ -196,19 +223,20 @@ jobs:
run: |
set -o pipefail
python -m pip install --upgrade pip
- pip install -r 04-配置文件/requirements.txt 2>&1 | tail -20 || \
+ pip install -r config/requirements.txt 2>&1 | tail -20 || \
pip install pytest faker factory-boy requests websocket-client pdfplumber python-docx openpyxl PyYAML python-dotenv tenacity loguru beautifulsoup4 jsonschema
- name: 核心 utils 导入测试
run: |
- cd 05-代码示例
+ cd utils
python -c "
- import sys
+ import sys, os
+ sys.path.insert(0, '.')
modules = [
- 'api_retry_util', 'data_factory', 'data_masking',
- 'excel_generator', 'flaky_detector', 'jmeter_csv_exporter',
- 'jmeter_result_parser', 'regression_scope',
- 'ci_quality_gate', 'prd_loader',
+ 'protocols.api_retry_util', 'data.data_factory', 'data.data_masking',
+ 'reporting.excel_generator', 'quality.flaky_detector', 'performance.jmeter_csv_exporter',
+ 'performance.jmeter_result_parser', 'infra.regression_scope',
+ 'quality.ci_quality_gate', 'design.prd_loader',
]
failed = []
for m in modules:
@@ -234,7 +262,8 @@ jobs:
fail=0
for md in $(find . -name "*.md" -not -path "./node_modules/*" -not -path "./.git/*"); do
# 提取 markdown 内部链接 [...](xxx.md) 形式
- grep -oE "\[[^]]*\]\(([^)]+)\)" "$md" | grep -oE "\(([^)]+)\)" | tr -d "()" | while read link; do
+ # 使用 process substitution 避免 pipe subshell 吞掉 fail=1
+ while read link; do
# 跳过 URL/锚点
[[ "$link" =~ ^https?:// ]] && continue
[[ "$link" =~ ^# ]] && continue
@@ -248,9 +277,13 @@ jobs:
echo "❌ $md → $link (target=$target_file 不存在)"
fail=1
fi
- done || true
+ done < <(grep -oE "\[[^]]*\]\(([^)]+)\)" "$md" | grep -oE "\(([^)]+)\)" | tr -d "()")
done
- [ $fail -eq 0 ] && echo "✅ 所有内部链接有效"
+ if [ $fail -ne 0 ]; then
+ echo "❌ 发现死链,请修复后重试"
+ exit 1
+ fi
+ echo "✅ 所有内部链接有效"
# ===== 7. 文件统计校验 =====
file-count:
@@ -261,14 +294,14 @@ jobs:
- name: 统计核对
run: |
- AGENTS=$(ls 02-专家定义/[0-9]*.md | wc -l)
- SKILLS=$(ls 03-技能定义/*.md | grep -v README | wc -l)
- UTILS=$(ls 05-代码示例/*.py | wc -l)
+ AGENTS=$(ls agents/[0-9]*.md | wc -l)
+ SKILLS=$(ls skills/*.md | grep -v README | wc -l)
+ UTILS=$(find utils -name "*.py" ! -name "__init__.py" | wc -l)
echo "Agents=$AGENTS Skills=$SKILLS Utils=$UTILS"
- [ "$AGENTS" = "16" ] || { echo "❌ Agents 数量不符(期 16,实 $AGENTS)"; exit 1; }
- [ "$SKILLS" -eq "32" ] || { echo "❌ Skills 数量不符(期 32,实 $SKILLS)"; exit 1; }
- [ "$UTILS" -eq "67" ] || { echo "❌ Utils 数量不符(期 67,实 $UTILS)"; exit 1; }
- echo "✅ 文件统计正确"
+ [ "$AGENTS" = "16" ] || { echo "❌ Agents count mismatch (expected 16, got $AGENTS)"; exit 1; }
+ [ "$SKILLS" -eq "32" ] || { echo "❌ Skills count mismatch (expected 32, got $SKILLS)"; exit 1; }
+ [ "$UTILS" -eq "78" ] || { echo "❌ Utils count mismatch (expected 78, got $UTILS)"; exit 1; }
+ echo "✅ File counts correct"
- name: 验证 .gitignore 排除源 MD
run: |
@@ -304,7 +337,7 @@ jobs:
- name: 安装运行时依赖
run: |
python -m pip install --upgrade pip
- pip install -r 04-配置文件/requirements.txt 2>&1 | tail -10 || true
+ pip install -r config/requirements.txt 2>&1 | tail -10 || true
# 兜底:确保关键运行时模块在
pip install pydantic pydantic-settings typer rich loguru pyyaml openpyxl factory-boy faker prefect
@@ -320,8 +353,7 @@ jobs:
python -m runtime.cli.main selftest --e2e --pass-threshold 0.80
echo "✅ L2 stub e2e ≥80% 节点通过"
- # ===== 9. runtime/tests pytest 单元测试 (V1.16-followup 新加) =====
- # 验 7 AgentRunner 专项 + registry + router + X4 防 mock 闭环 测试
+ # ===== 9. runtime/tests pytest 单元测试 =====
pytest-unit:
name: runtime/tests pytest 单元
runs-on: ubuntu-latest
@@ -332,12 +364,12 @@ jobs:
python-version: '3.11'
cache: pip
- - name: 安装 runtime 最小依赖 (不装 04-配置文件/requirements.txt — 避 pytest-bdd 7.0 与 pytest 8.3 冲突)
+ - name: 安装 runtime 最小依赖
run: |
python -m pip install --upgrade pip
# runtime/ 依赖 (与 selftest-mock 一致)
pip install pydantic pydantic-settings typer rich loguru pyyaml openpyxl factory-boy faker prefect fastapi python-multipart httpx
- # pytest core (排除 pytest-bdd / pytest-playwright 等用户场景 plugin, 避免 _pytest.nodes iterparentnodeids 冲突)
+ # pytest core (排除 pytest-bdd / pytest-playwright 等用户场景 plugin)
pip install pytest pytest-asyncio pytest-cov
- name: pytest runtime/tests/ (排除 test_router_real 真 LLM)
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 955acb1..9d87e8c 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -47,6 +47,8 @@ jobs:
build-mode: none
- language: python
build-mode: none
+ - language: javascript-typescript
+ build-mode: none
# CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift'
# Use `c-cpp` to analyze code written in C, C++ or both
# Use 'java-kotlin' to analyze code written in Java, Kotlin or both
diff --git a/.github/workflows/desktop-release.yml b/.github/workflows/desktop-release.yml
index c128b39..ba4abe2 100644
--- a/.github/workflows/desktop-release.yml
+++ b/.github/workflows/desktop-release.yml
@@ -13,13 +13,13 @@ jobs:
build-windows:
runs-on: windows-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- - uses: actions/setup-python@v5
+ - uses: actions/setup-python@v6
with:
python-version: "3.11"
- - uses: actions/setup-node@v4
+ - uses: actions/setup-node@v6
with:
node-version: "20"
@@ -54,13 +54,13 @@ jobs:
build-macos:
runs-on: macos-latest
steps:
- - uses: actions/checkout@v4
+ - uses: actions/checkout@v6
- - uses: actions/setup-python@v5
+ - uses: actions/setup-python@v6
with:
python-version: "3.11"
- - uses: actions/setup-node@v4
+ - uses: actions/setup-node@v6
with:
node-version: "20"
diff --git a/.github/workflows/selftest-weekly.yml b/.github/workflows/selftest-weekly.yml
index d062a46..b6e73a6 100644
--- a/.github/workflows/selftest-weekly.yml
+++ b/.github/workflows/selftest-weekly.yml
@@ -29,7 +29,7 @@ jobs:
- name: 安装依赖
run: |
python -m pip install --upgrade pip
- pip install -r 04-配置文件/requirements.txt 2>&1 | tail -10
+ pip install -r config/requirements.txt 2>&1 | tail -10
pip install litellm # 真 LLM 调用必需
- name: L1 frontmatter lint
diff --git a/.gitignore b/.gitignore
index 827f545..903351f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,12 +19,12 @@ htmlcov/
workspace/测试数据/
workspace/执行日志/
workspace/测试报告/
+workspace/_outputs/
workspace/feedback/
workspace/自动化脚本/
# 但保留 workspace/执行日志/baselines/(性能基线需提交)
!workspace/执行日志/baselines/
-!workspace/执行日志/baselines/*.json
# ===== 敏感配置 =====
.env
@@ -50,7 +50,6 @@ npm-debug.log
package-lock.json
# ===== 操作系统 =====
-.DS_Store
desktop.ini
# ===== 日志 =====
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ffac747..4ba2e7d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,9 +7,8 @@ repos:
exclude: \.md$
- id: end-of-file-fixer
# upstream skill 目录不动(主宪章 §14 §27 darwin-skill / karpathy-guidelines 禁本地 fork)
- exclude: ^03-技能定义/(darwin-skill|karpathy-guidelines)/
+ exclude: ^skills/(darwin-skill|karpathy-guidelines)/
- id: check-yaml
- exclude: ^06-CICD集成/jenkins-pipeline\.groovy$
- id: check-json
- id: check-toml
- id: check-added-large-files
@@ -25,8 +24,11 @@ repos:
rev: v0.1.13
hooks:
- id: ruff
- args: [--config, "05-代码示例/.ruff.toml"]
- files: ^05-代码示例/
+ args: [--config, "utils/.ruff.toml"]
+ files: ^utils/
+ - id: ruff
+ args: [--config, "runtime/pyproject.toml"]
+ files: ^runtime/
# ruff-format 关掉:utils 紧凑 CLI 风格不需自动重排
# ===== 凭据扫描(gitleaks)=====
@@ -65,7 +67,7 @@ repos:
# 文件统计校验(防误删 agent/skill/utils)
- id: file-count-check
name: Agents/Skills/Utils 数量校验
- entry: bash -c 'A=$(ls 02-专家定义/[0-9]*.md 2>/dev/null | wc -l); S=$(ls 03-技能定义/*.md 2>/dev/null | grep -v README | wc -l); U=$(ls 05-代码示例/*.py 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents 数量异常(期 16,实 $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills 数量异常(期 32,实 $S)"; exit 1; }; [ "$U" -eq "67" ] || { echo "❌ Utils 数量异常(期 67,实 $U)"; exit 1; }; echo "✅ 文件统计正确"'
+ entry: bash -c 'A=$(ls agents/[0-9]*.md 2>/dev/null | wc -l); S=$(ls skills/*.md 2>/dev/null | grep -v README | wc -l); U=$(find utils -name "*.py" ! -name "__init__.py" 2>/dev/null | wc -l); [ "$A" = "16" ] || { echo "❌ Agents count mismatch (expected 16, got $A)"; exit 1; }; [ "$S" -eq "32" ] || { echo "❌ Skills count mismatch (expected 32, got $S)"; exit 1; }; [ "$U" -eq "78" ] || { echo "❌ Utils count mismatch (expected 78, got $U)"; exit 1; }; echo "✅ File counts correct"'
language: system
pass_filenames: false
always_run: true
@@ -91,9 +93,9 @@ repos:
# MD034(no-bare-urls):内部文档可直接贴 URL
# MD040(fenced-code-language) / MD014(dollar-prefix) / MD009(trailing) / MD012(multi-blank) / MD010(hard-tab)
# / MD025(single-h1) / MD026(trailing-punct):中文项目常见 nit,与现有 disable 风格一致
- args: ['--disable', 'MD013', 'MD033', 'MD041', 'MD036', 'MD022', 'MD031', 'MD032', 'MD024', 'MD034', 'MD040', 'MD014', 'MD009', 'MD012', 'MD010', 'MD025', 'MD026', 'MD050', 'MD049', 'MD007', 'MD004', 'MD005', 'MD030', 'MD035', 'MD037', 'MD038', 'MD039', '--']
+ args: ['--disable', 'MD013', 'MD033', 'MD041', 'MD036', 'MD022', 'MD031', 'MD032', 'MD024', 'MD034', 'MD040', 'MD014', 'MD025', 'MD026', 'MD050', 'MD049', 'MD007', 'MD035', 'MD038', 'MD039', '--']
# upstream skill dirs 不改本地:darwin-skill / karpathy-guidelines(主宪章 §14 §27)
- exclude: ^(Test-Agent工作流搭建\.md|03-技能定义/(darwin-skill|karpathy-guidelines)/.*)$
+ exclude: ^(skills/(darwin-skill|karpathy-guidelines)/.*)$
# 配置:项目根 .markdownlint.json 自定义规则
default_install_hook_types: [pre-commit]
diff --git "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md" "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md"
index 542affa..6282d50 100644
--- "a/00-\351\241\271\347\233\256\345\257\274\350\210\252.md"
+++ "b/00-\351\241\271\347\233\256\345\257\274\350\210\252.md"
@@ -7,12 +7,12 @@
## 零、V1.1.0 运行时层(`runtime/`)
-> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 49 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。
+> 已有 16 专家 / 32 Skill(含 darwin-skill 自进化) / 67 脚本**保持不动**(宪章铁律),runtime 仅作可执行调度层。
| 模块 | 路径 | 用途 |
|------|------|------|
| AI 路由 | `runtime/router/` | LiteLLM 多厂商 + Ollama 兜底。被测物 → 专家+Skill DAG |
-| 注册中心 | `runtime/registry/` | 扫 `02-专家定义/*.md` + `03-技能定义/*.md` frontmatter |
+| 注册中心 | `runtime/registry/` | 扫 `agents/*.md` + `skills/*.md` frontmatter |
| 编排 | `runtime/orchestrator/` | Prefect 2.x flow + Direct 降级执行器(双轨) |
| API | `runtime/api/` | FastAPI 入口,多格式输入(PDF/Word/MD/exe/APK/IPA/Docker/URL/口头) |
| CLI | `runtime/cli/` | `tagent run|plan|catalog|doctor|search|install|verify|export|selftest|init|demo` |
@@ -35,28 +35,28 @@
| 序号 | Agent 文件 | 职责 |
|------|-----------|------|
-| 01 | `02-专家定义/01-测试主管.md` | test-lead:协调全流程、质量门禁、测试计划(IEEE 829)、产品形态识别路由 |
-| 02 | `02-专家定义/02-需求分析.md` | requirements-analyst:多格式 PRD 解析(md/pdf/docx/xlsx/zip/png/url)→ MD + JSON 摘要 |
-| 03 | `02-专家定义/03-用例设计.md` | testcase-designer:等价类/边界值/场景法 → 4 Sheet Excel |
-| 04 | `02-专家定义/04-环境管理.md` | env-manager:环境健康检查、Docker 编排 |
-| 05 | `02-专家定义/05-数据准备.md` | data-preparer:Faker 数据工厂、脱敏、JMeter CSV |
-| 06 | `02-专家定义/06-自动化脚本.md` | automation-engineer:Web/API(Playwright + requests)+ JMeter 协调 |
-| 07 | `02-专家定义/07-测试执行.md` | test-executor:四阶段执行(冒烟/回归/全量/性能)、Flaky 隔离 |
-| 08 | `02-专家定义/08-Bug管理.md` | bug-manager:BugTracker 提交(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,severity 1=P0)、生命周期追踪 |
-| 09 | `02-专家定义/09-报告生成.md` | report-generator:Allure + Word + 多端通知(企微/飞书/钉钉/Slack/邮件/Teams,curl 直连) |
+| 01 | `agents/01-测试主管.md` | test-lead:协调全流程、质量门禁、测试计划(IEEE 829)、产品形态识别路由 |
+| 02 | `agents/02-需求分析.md` | requirements-analyst:多格式 PRD 解析(md/pdf/docx/xlsx/zip/png/url)→ MD + JSON 摘要 |
+| 03 | `agents/03-用例设计.md` | testcase-designer:等价类/边界值/场景法 → 4 Sheet Excel |
+| 04 | `agents/04-环境管理.md` | env-manager:环境健康检查、Docker 编排 |
+| 05 | `agents/05-数据准备.md` | data-preparer:Faker 数据工厂、脱敏、JMeter CSV |
+| 06 | `agents/06-自动化脚本.md` | automation-engineer:Web/API(Playwright + requests)+ JMeter 协调 |
+| 07 | `agents/07-测试执行.md` | test-executor:四阶段执行(冒烟/回归/全量/性能)、Flaky 隔离 |
+| 08 | `agents/08-Bug管理.md` | bug-manager:BugTracker 提交(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook,severity 1=P0)、生命周期追踪 |
+| 09 | `agents/09-报告生成.md` | report-generator:Allure + Word + 多端通知(企微/飞书/钉钉/Slack/邮件/Teams,curl 直连) |
### 通用流程 8 Skill
| Skill | 文件 | 用途 |
|-------|-----|------|
-| `/smoke-test` | `03-技能定义/smoke-test.md` | P0 冒烟(≥95% 门禁) |
-| `/test-coordinator` | `03-技能定义/test-coordinator.md` | 完整流程编排(自动平台路由) |
-| `/regression-test` | `03-技能定义/regression-test.md` | P0+P1 回归 + Flaky + JMeter 基线对比 |
-| `/testcase-design` | `03-技能定义/testcase-design.md` | 默认 4 Sheet Excel,V1.9 加 xmind/markmap/opml 多格式自选 |
-| `/python-script-gen` | `03-技能定义/python-script-gen.md` | pytest UI/API 脚本 |
-| `/jmeter-script-gen` | `03-技能定义/jmeter-script-gen.md` | JMeter JMX(双模式 ci_quick/full) |
-| `/data-preparation` | `03-技能定义/data-preparation.md` | 测试数据 + JMeter CSV |
-| `/zentao-bug-submission` | `03-技能定义/zentao-bug-submission.md` | 禅道 Bug 规范提交(默认 tracker 示例;其他 tracker 通过 `BUG_TRACKER` env 选) |
+| `/smoke-test` | `skills/smoke-test.md` | P0 冒烟(≥95% 门禁) |
+| `/test-coordinator` | `skills/test-coordinator.md` | 完整流程编排(自动平台路由) |
+| `/regression-test` | `skills/regression-test.md` | P0+P1 回归 + Flaky + JMeter 基线对比 |
+| `/testcase-design` | `skills/testcase-design.md` | 默认 4 Sheet Excel,V1.9 加 xmind/markmap/opml 多格式自选 |
+| `/python-script-gen` | `skills/python-script-gen.md` | pytest UI/API 脚本 |
+| `/jmeter-script-gen` | `skills/jmeter-script-gen.md` | JMeter JMX(双模式 ci_quick/full) |
+| `/data-preparation` | `skills/data-preparation.md` | 测试数据 + JMeter CSV |
+| `/zentao-bug-submission` | `skills/zentao-bug-submission.md` | 禅道 Bug 规范提交(默认 tracker 示例;其他 tracker 通过 `BUG_TRACKER` env 选) |
### 通用 utils(11 个,流程闭环必备)
@@ -82,11 +82,11 @@
| 平台 | Agent 文件 | 主驱动 |
|------|-----------|-------|
-| **移动**(Android/iOS/小程序) | `02-专家定义/10-移动测试.md` | mobile-tester:Appium + 微信开发者 CLI + Android Monkey 稳定性 |
-| **桌面**(EXE/macOS/Linux/Electron) | `02-专家定义/11-桌面测试.md` | desktop-tester:pywinauto + PyAutoGUI + Playwright Electron + EXE+WS 混合 |
-| **视觉/游戏** | `02-专家定义/12-视觉游戏测试.md` | visual-tester:Airtest + OpenCV SSIM + Tesseract OCR |
-| **系统集成**(IoT/音视频/链路/MQ) | `02-专家定义/13-系统集成测试.md` | system-tester:SSH+串口+MQTT+FFmpeg+Jaeger+Kafka |
-| **AI/ML** | `02-专家定义/14-AI模型测试.md` | ai-tester:scikit-learn + scipy(漂移)+ LLM eval |
+| **移动**(Android/iOS/小程序) | `agents/10-移动测试.md` | mobile-tester:Appium + 微信开发者 CLI + Android Monkey 稳定性 |
+| **桌面**(EXE/macOS/Linux/Electron) | `agents/11-桌面测试.md` | desktop-tester:pywinauto + PyAutoGUI + Playwright Electron + EXE+WS 混合 |
+| **视觉/游戏** | `agents/12-视觉游戏测试.md` | visual-tester:Airtest + OpenCV SSIM + Tesseract OCR |
+| **系统集成**(IoT/音视频/链路/MQ) | `agents/13-系统集成测试.md` | system-tester:SSH+串口+MQTT+FFmpeg+Jaeger+Kafka |
+| **AI/ML** | `agents/14-AI模型测试.md` | ai-tester:scikit-learn + scipy(漂移)+ LLM eval |
### 平台专项 5 Skill
@@ -210,9 +210,9 @@
| 文件 | 用途 |
|------|------|
-| `06-CICD集成/github-actions-test.yml` | GitHub Actions(preflight/code-quality/smoke/regression/performance/quality-gate 6 job) |
-| `06-CICD集成/jenkins-pipeline.groovy` | Jenkins Pipeline(含 docker python:3.11 + JMeter archive 兜底) |
-| `06-CICD集成/CICD集成说明.md` | Secrets / 门禁 / Q&A |
+| `ci/github-actions-test.yml` | GitHub Actions(preflight/code-quality/smoke/regression/performance/quality-gate 6 job) |
+| `ci/jenkins-pipeline.groovy` | Jenkins Pipeline(含 docker python:3.11 + JMeter archive 兜底) |
+| `ci/CICD集成说明.md` | Secrets / 门禁 / Q&A |
| 顶层 `install.sh` | 一键远程部署(curl + bash) |
---
@@ -223,9 +223,9 @@
|------|------|
| `requirements.txt` | Python 依赖(`==` 锁定) |
| `.github/dependabot.yml` | 周扫描 pip + actions + npm 自动 PR |
-| `06-CICD集成/github-actions-test.yml` | CI 跑 `pip-audit` + `safety` 拦 CVE |
-| `01-快速开始/配置清单.md` "依赖管理章节" | 完整策略 + 命令 |
-| `01-快速开始/部署说明.md` "依赖升级管理 SOP" | 季度升级流程 |
+| `ci/github-actions-test.yml` | CI 跑 `pip-audit` + `safety` 拦 CVE |
+| `docs/getting-started/配置清单.md` "依赖管理章节" | 完整策略 + 命令 |
+| `docs/getting-started/部署说明.md` "依赖升级管理 SOP" | 季度升级流程 |
策略:固定版 + Dependabot 自动 PR + CVE 拦截 + 季度人工评审。
@@ -233,16 +233,16 @@
| 文件 | 用途 |
|------|------|
-| `01-快速开始/使用手册.md` | 启动指引 + skill 详解(含 darwin-skill) + FAQ |
-| `01-快速开始/部署说明.md` | 跨平台(Win/Mac/Linux)部署 + Java/JMeter/Allure 安装 |
-| `01-快速开始/配置清单.md` | .env 全字段 + Secrets / Credentials / Webhook 申请 |
-| `01-快速开始/交付物清单.md` | 测试计划 / 测试报告 / Bug 提交位置 |
-| `04-配置文件/conftest.py` | pytest 全局 fixture(项目根唯一权威) |
-| `04-配置文件/pytest.ini` | 40+ markers / addopts / junit-xml |
-| `04-配置文件/.env.example` | 全字段模板 |
-| `04-配置文件/.mcp.json` | MCP 配置(仅 filesystem 启用) |
-| `04-配置文件/requirements.txt` | Python 依赖 |
-| `04-配置文件/mcp-server-impl.md` | MCP server 自实现教程 |
+| `docs/getting-started/使用手册.md` | 启动指引 + skill 详解(含 darwin-skill) + FAQ |
+| `docs/getting-started/部署说明.md` | 跨平台(Win/Mac/Linux)部署 + Java/JMeter/Allure 安装 |
+| `docs/getting-started/配置清单.md` | .env 全字段 + Secrets / Credentials / Webhook 申请 |
+| `docs/getting-started/交付物清单.md` | 测试计划 / 测试报告 / Bug 提交位置 |
+| `config/conftest.py` | pytest 全局 fixture(项目根唯一权威) |
+| `config/pytest.ini` | 40+ markers / addopts / junit-xml |
+| `config/.env.example` | 全字段模板 |
+| `config/.mcp.json` | MCP 配置(仅 filesystem 启用) |
+| `config/requirements.txt` | Python 依赖 |
+| `config/mcp-server-impl.md` | MCP server 自实现教程 |
---
@@ -252,8 +252,8 @@
|------|---------|
| 新用户首次部署 | README → 部署说明 → 使用手册 → 配置清单 |
| 测试工程师日常 | 使用手册 → 交付物清单 → 对应 skill .md |
-| 开发者扩展 agent/skill | 本文档 → 02-专家定义/README → 03-技能定义/README → 05-代码示例/README |
-| 运维 / DevOps | 部署说明 → install.sh → 06-CICD集成/CICD集成说明 |
+| 开发者扩展 agent/skill | 本文档 → agents/README → skills/README → utils/README |
+| 运维 / DevOps | 部署说明 → install.sh → ci/CICD集成说明 |
| 架构审查 | 本文档 → 各分目录 README → 02-/03-/05- 子目录细节 |
---
@@ -264,10 +264,10 @@
- **32 业务 Skill + 3 元 Skill**(总 35 单元)
- 业务:通用 8 + 平台专项 5 + 渗透 7 + 车载 5 + ECC 加固 6 + 探索 1
- 元(子目录):`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/`
-- **49 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + __init__ 1)
+- **79 utils**(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 测试类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + 无障碍/i18n 2 + 度量 2 + 区块链/AI对抗 2 + 报告/SLO/邮件/减重 3 + 输入 1 + 证据链 1 + 禁忌矩阵 1 + __init__ 1)
- **20+ 协议**(HTTP/HTTPS/WS/gRPC/TCP/UDP/GraphQL/SOAP/Modbus/MQTT/SSH/串口/Kafka/RabbitMQ/Jaeger 等)
- **9 PRD 格式**(md/txt/pdf/docx/xlsx/zip/img/html/url)
-- **覆盖率约 95%**(剩 5% 为高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加)
+- **覆盖率 ~90%**(高度专业合规领域:航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508 等,业务方按需自加)
---
@@ -337,25 +337,25 @@ Web UI MVP:`runtime/web/`(Vite+React+shadcn,4 页 + §21 L2 测试套件)
| 专家 | 路径 |
|------|------|
-| pentest-tester | `02-专家定义/15-渗透测试.md` |
-| automotive-tester | `02-专家定义/16-车载测试.md` |
+| pentest-tester | `agents/15-渗透测试.md` |
+| automotive-tester | `agents/16-车载测试.md` |
### 7 渗透 skill + 5 车载 skill = 12 新 skill
| Skill | 路径 |
|-------|------|
-| /pentest-coordinator(主) | `03-技能定义/pentest-coordinator.md` |
-| /pentest-recon | `03-技能定义/pentest-recon.md` |
-| /pentest-vuln | `03-技能定义/pentest-vuln.md` |
-| /pentest-exploit | `03-技能定义/pentest-exploit.md` |
-| /pentest-web | `03-技能定义/pentest-web.md` |
-| /pentest-api | `03-技能定义/pentest-api.md` |
-| /pentest-report | `03-技能定义/pentest-report.md` |
-| /automotive-test(主) | `03-技能定义/automotive-test.md` |
-| /automotive-can-bus-test | `03-技能定义/automotive-can-bus-test.md` |
-| /automotive-adas-scenario | `03-技能定义/automotive-adas-scenario.md` |
-| /automotive-ota-update-test | `03-技能定义/automotive-ota-update-test.md` |
-| /automotive-hil-loop-test | `03-技能定义/automotive-hil-loop-test.md` |
+| /pentest-coordinator(主) | `skills/pentest-coordinator.md` |
+| /pentest-recon | `skills/pentest-recon.md` |
+| /pentest-vuln | `skills/pentest-vuln.md` |
+| /pentest-exploit | `skills/pentest-exploit.md` |
+| /pentest-web | `skills/pentest-web.md` |
+| /pentest-api | `skills/pentest-api.md` |
+| /pentest-report | `skills/pentest-report.md` |
+| /automotive-test(主) | `skills/automotive-test.md` |
+| /automotive-can-bus-test | `skills/automotive-can-bus-test.md` |
+| /automotive-adas-scenario | `skills/automotive-adas-scenario.md` |
+| /automotive-ota-update-test | `skills/automotive-ota-update-test.md` |
+| /automotive-hil-loop-test | `skills/automotive-hil-loop-test.md` |
总数:14 专家 → **16** | 14 skill → **32**
@@ -370,13 +370,13 @@ installing/upstream-licensing 收录铁律。
| Skill | 路径 |
|-------|------|
-| /karpathy-guidelines(upstream) | `03-技能定义/karpathy-guidelines/SKILL.md` |
-| /tdd-workflow | `03-技能定义/tdd-workflow.md` |
-| /verification-loop | `03-技能定义/verification-loop.md` |
-| /e2e-testing | `03-技能定义/e2e-testing.md` |
-| /eval-harness | `03-技能定义/eval-harness.md` |
-| /security-review | `03-技能定义/security-review.md` |
-| /agent-introspection-debugging | `03-技能定义/agent-introspection-debugging.md` |
+| /karpathy-guidelines(upstream) | `skills/karpathy-guidelines/SKILL.md` |
+| /tdd-workflow | `skills/tdd-workflow.md` |
+| /verification-loop | `skills/verification-loop.md` |
+| /e2e-testing | `skills/e2e-testing.md` |
+| /eval-harness | `skills/eval-harness.md` |
+| /security-review | `skills/security-review.md` |
+| /agent-introspection-debugging | `skills/agent-introspection-debugging.md` |
### 新模块
@@ -398,7 +398,7 @@ marketplace/agent-introspection/essence-watcher 铁律。
| 路径 | 用途 |
|------|------|
| `docs/theory/13-build-your-own/` | 10 P0 卡:database/network/web/git/search/shell/regex/lang/browser/bot |
-| `03-技能定义/build-your-own-x-explorer.md` | 主 skill 引导式 deep-dive |
+| `skills/build-your-own-x-explorer.md` | 主 skill 引导式 deep-dive |
### Marketplace 4 lane
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md"
deleted file mode 100644
index e081ede..0000000
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/README.md"
+++ /dev/null
@@ -1,55 +0,0 @@
-# 03-技能定义 索引
-
-**32 业务 Skill + 3 元 Skill**。
-
-业务 skill 按领域分:通用流程 8 + 平台专项 5 + 渗透安全 7 + 车载 5 + ECC 测试加固 6 + 探索学习 1 = 32。
-元 skill 3 个子目录:`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` — 用法见各子目录 SKILL.md。
-
-顶层导航见根目录 [00-项目导航.md](../00-项目导航.md);路线图见 [ROADMAP.md](../ROADMAP.md)。
-
----
-
-## 类别 1:通用流程 8 Skill
-
-| Skill | 文件 | 用途 | 触发示例 |
-|-------|-----|------|---------|
-| `/smoke-test` | `smoke-test.md` | P0 冒烟(≥95% 门禁,11min 上限) | 上线前快速验证 |
-| `/test-coordinator` | `test-coordinator.md` | 完整流程编排(自动平台路由) | 新功能完整测试 |
-| `/regression-test` | `regression-test.md` | P0+P1 回归 + Flaky + JMeter | 迭代发布前 |
-| `/testcase-design` | `testcase-design.md` | 4 Sheet Excel 用例 | 评审前 / 手测前 |
-| `/python-script-gen` | `python-script-gen.md` | pytest UI/API 脚本生成 | 用例转自动化 |
-| `/jmeter-script-gen` | `jmeter-script-gen.md` | JMeter JMX 性能计划(双模式 ci_quick/full) | 性能测试 |
-| `/data-preparation` | `data-preparation.md` | 测试数据 + JMeter CSV 生成 | 测试前数据准备 |
-| `/zentao-bug-submission` | `zentao-bug-submission.md` | BugTracker 规范提交(默认禅道示例,可换 Jira/GitHub/GitLab/Linear/Webhook,见 `BUG_TRACKER` env) | 失败用例后 |
-
----
-
-## 类别 2:平台专项 5 Skill(按产品形态选)
-
-| Skill | 文件 | 平台 | 必装外部依赖 |
-|-------|-----|------|-------------|
-| `/mobile-test` | `mobile-test.md` | Android / iOS / 微信/支付宝小程序 | Appium server / Android SDK / Xcode / 微信开发者工具 |
-| `/desktop-test` | `desktop-test.md` | Windows EXE / macOS .app / Linux GUI / Electron | pywinauto(Win) / pyautogui / Playwright |
-| `/visual-test` | `visual-test.md` | 游戏 / Canvas / WebGL / OCR / 视觉回归 | Airtest / Tesseract / OpenCV |
-| `/system-test` | `system-test.md` | IoT / 音视频 / 链路追踪 / 消息队列 | FFmpeg / Jaeger / Kafka 或 RabbitMQ |
-| `/ai-test` | `ai-test.md` | AI/ML 模型 / LLM 应用 | 推理服务 endpoint / LLM API |
-
----
-
-## 每个 Skill 文件结构
-
-每个 skill 文件统一包含以下章节:
-
-1. **YAML frontmatter**(name / description / tools)
-2. **🔔 开测前准备清单**(平台 skill 含此段,列必备 + 可选项)
-3. **触发方式**(`/skill-name`)
-4. **适用场景**
-5. **执行流程**(Step 1, 2, 3...)
-6. **质量门禁**
-7. **输出文件**
-
----
-
-## 添加新 Skill
-
-详见根目录 [`CONTRIBUTING.md`](../CONTRIBUTING.md) "添加新 Skill" 章节。
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/result-card-example.png" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/result-card-example.png"
deleted file mode 100644
index 4d4851d..0000000
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/result-card-example.png"
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:841d9c30602f0de5a165918efddd9aa01a7f7bcb3902d8cb95b3e37a1f22b2f9
-size 878511
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/showcase.html" "b/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/showcase.html"
deleted file mode 100644
index 2854eea..0000000
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/showcase.html"
+++ /dev/null
@@ -1,1059 +0,0 @@
-
-
-
-
-
-自主技能优化系统
-
-
-
-
-
-
-
-
- 自主技能优化系统
- Auto Skill
Optimizer
-
- 评估 → 改进 → 实测验证 → 人类确认 → 保留或回滚
-
-
-
「autoresearch 的核心想法很简单:让系统自主运行实验,评估结果,只保留有效的改进。一个只能向前转的棘轮。」
-
Andrej Karpathy — 谈自主实验循环
-
-
-
-
-
-
-
- 01
- 核心原则
- 五条规则,防止优化器偏移方向、自我刷分或引入退化。
-
-
-
-
01
-
单一可编辑资产
-
每轮优化只针对一个 SKILL.md 文件。一次修改,一次测量,一次决策。不做跨文件编辑,避免归因模糊。
-
-
-
02
-
双重评估
-
静态结构分析捕捉格式和完整性问题。实测执行捕捉行为退化。两者缺一不可。
-
-
-
03
-
棘轮机制
-
提升总分的改进被 commit。降低分数的修改自动 revert。分数只能上升或持平,永远不会下降。
-
-
-
04
-
独立评分
-
编辑 Skill 的 Agent 永远不为自己打分。由独立的子 Agent 评估输出质量,防止自我表扬偏差。
-
-
-
05
-
人在回路
-
每个 Skill 的优化循环完成后,系统暂停。向人类展示 diff 摘要、分数变化和测试输出对比。没有明确确认,任何改动都不会生效。
-
-
-
-
-
-
-
-
- 02
- 8维度
评估体系
- 100分评估体系。结构维度捕捉你能看到的问题,效果维度捕捉只有运行时才能感知的问题。
-
-
-
-
- 结构维度 — 静态分析
-
-
- | # |
- 维度 |
- 权重 |
- 评分标准 |
-
-
-
-
- | 1 |
- Frontmatter质量 |
- 8 |
- 名称正确,描述包含功能/触发条件/使用场景,不超过1024字符 |
-
-
- | 2 |
- 工作流清晰度 |
- 15 |
- 步骤有编号、可执行,每步都有明确的输入/输出 |
-
-
- | 3 |
- 边界条件覆盖 |
- 10 |
- 错误处理、降级方案、常见故障恢复 |
-
-
- | 4 |
- 检查点设计 |
- 7 |
- 关键决策前需用户确认,防止自主失控 |
-
-
- | 5 |
- 指令具体性 |
- 15 |
- 无歧义,具体的参数/格式/示例,可直接执行 |
-
-
- | 6 |
- 资源整合度 |
- 5 |
- 所有引用的脚本/资产路径存在且可访问 |
-
-
-
-
-
- 效果维度 — 需要实测
-
-
- | # |
- 维度 |
- 权重 |
- 评分标准 |
-
-
-
-
- | 7 |
- 整体架构 |
- 15 |
- 层次清晰,无冗余或遗漏,符合生态系统约定 |
-
-
- | 8 |
- 实测表现 |
- 25 |
- 运行2-3个测试提示词,对比启用 Skill 和 baseline 的输出质量 |
-
-
-
-
-
-
-
-
-
- 03
- 优化循环
- 从初始化到最终报告的五个阶段。系统在每个阶段内自主运行,但在阶段之间暂停等待人类审查。
-
-
-
-
- 0
- 初始化
-
-
-
范围与分支设置
-
确定优化范围,创建版本控制基础设施,加载历史记录。
-
- - 确认范围:全部 Skill 还是用户指定子集
- - 扫描 .claude/skills/*/SKILL.md 获取目标列表
- - 创建 git 分支:auto-optimize/YYYYMMDD-HHMM
- - 初始化或加载 results.tsv 用于历史追踪
-
-
-
-
-
-
- 0.5
- 设计
-
-
-
测试提示词工程
-
在任何评分之前,先设计用于衡量效果的测试提示词。没有好的测试,优化器就是盲飞。
-
- - 阅读每个 SKILL.md,理解其声明的能力
- - 为每个 Skill 设计2-3个提示词:一个正常路径,一个模糊场景
- - 保存到每个 Skill 目录下的 test-prompts.json
- - 在继续之前,将所有测试提示词提交人类审批
-
-
-
-
-
-
- 1
- 基线
-
-
-
全维度评分
-
为每个 Skill 建立起始分数。结构评分由主 Agent 完成,效果评分由独立子 Agent 完成。
-
- - 阅读 SKILL.md,为维度1-7评分并附理由
- - 启动子 Agent:分别在启用和未启用 Skill 的情况下运行测试提示词
- - 对比输出,为维度8评分(如子 Agent 不可用则标记 dry_run)
- - 计算加权总分,记录到 results.tsv
- - 展示评分卡,暂停等待人类确认
-
-
-
-
-
-
- 2
- 优化
-
-
-
Hill-Climbing 循环
-
按分数从低到高处理 Skill。每轮:诊断最弱维度,提出一个针对性修复,执行,重新评分,做出决定。
-
- - 找出该 Skill 得分最低的维度
- - 生成一项具体改进(改什么,为什么改,预期分数变化)
- - 编辑 SKILL.md,用结构化消息 git commit
- - 重新评分:结构由主 Agent,效果由独立子 Agent
- - 新分 > 旧分:保留。否则:git revert,进入下一个 Skill
- - 每个 Skill 完成后:展示 diff + 分数变化,等待人类确认
-
-
-
-
-
-
- 3
- 报告
-
-
-
总结与指标
-
将所有结果汇总为最终优化报告,包含优化前后分数、实验次数和关键改进。
-
- - 统计总实验次数、保留次数、回滚次数和测试模式
- - 生成每个 Skill 的优化前后分数对比表
- - 列出影响最大的改进及其对应维度
- - 归档 results.tsv 供未来 baseline 参考
-
-
-
-
-
-
-
-
-
-
- 04
- 棘轮机制
- 分数只能上升。每轮要么改进 Skill,要么干净地回滚。不会随时间积累局部退化。
-
-
-
-
-
-
-
-
- 05
- 为什么需要
双重评估
- 单看结构无法判断 Skill 是否真正好用。单看效果无法判断它为何失败。
-
-
-
-
传统方法
-
纯结构审查
-
- - 检查 frontmatter 是否存在且格式正确
- - 验证步骤是否有编号和描述
- - 确认文件路径和引用是否有效
- - 无法检测 Skill 是否真正提升了输出质量
- - 无法检测看似正确实则产生差结果的误导性指令
- - 无法检测弊大于利的过度约束
-
-
-
-
Auto Skill Optimizer
-
双重评估
-
- - 结构评分捕捉格式、完整性和可读性问题
- - 实测执行揭示真实场景下的行为影响
- - 基线对比衡量 Skill 是增值还是减值
- - 独立子 Agent防止自我表扬的评分偏差
- - 测试提示词设计确保评估针对真实用户场景
- - Dry-run 降级在实测不可用时提供覆盖
-
-
-
-
-
-
-
-
-
- 06
- 概念映射
- autoresearch 的核心抽象如何转化为 Skill 优化。同一台机器,不同的领域。
-
-
-
-
- | Autoresearch |
- Skill Optimizer |
- 实现细节 |
-
-
-
-
- | 研究论文草稿 |
- SKILL.md 文件 |
- 唯一的可编辑产物。所有改进都表现为对这一个文件的编辑。 |
-
-
- | 评估指标 |
- 8维度评估体系 |
- 跨结构(60分)和效果(40分)的加权评分,总计100分。 |
-
-
- | 实验循环 |
- 阶段2 hill-climbing |
- 诊断最弱维度,提出修复,执行,重新评分,保留或回滚。每个 Skill 最多3轮。 |
-
-
- | 版本控制 |
- Git 分支 + revert |
- 每次编辑都是一次 commit。退化通过 revert(新 commit)回滚。完整审计记录。 |
-
-
- | 自动化评估 |
- 子 Agent 测试执行 |
- 独立 Agent 分别在启用和未启用 Skill 的情况下运行测试提示词,对比输出质量。 |
-
-
- | 人类审查关卡 |
- 阶段转换暂停 |
- 系统在基线评分后和每个 Skill 优化后暂停。展示 diff + 分数变化。 |
-
-
- | 探索 vs 利用 |
- 阶段2.5探索性重写 |
- 当 hill-climbing 停滞(连续2次在第1轮就中断),提出完整的结构重写。 |
-
-
- | 实验日志 |
- results.tsv |
- 带时间戳的记录:commit 哈希、Skill 名称、新旧分数、保留/回滚状态、评估模式。 |
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py"
deleted file mode 100644
index 48f1a27..0000000
--- "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/ai_validator.py"
+++ /dev/null
@@ -1,175 +0,0 @@
-# SPDX-License-Identifier: MIT
-"""
-AI/ML 模型校验:模型评估 / 漂移检测 / 公平性 / LLM 输出
-被引用方:14-AI模型测试 agent
-"""
-import json
-import logging
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import requests
-
-logger = logging.getLogger(__name__)
-
-
-# ===== 加载推理结果 =====
-
-def load_predictions(endpoint: str, inputs: List[Any], batch: int = 32,
- timeout: int = 30) -> List[Any]:
- """批量调推理服务,返回预测列表"""
- predictions: List[Any] = []
- for i in range(0, len(inputs), batch):
- chunk = inputs[i:i + batch]
- r = requests.post(endpoint, json={"inputs": chunk}, timeout=timeout)
- r.raise_for_status()
- predictions.extend(r.json().get("predictions", r.json()))
- return predictions
-
-
-# ===== 数据漂移检测 =====
-
-def detect_drift(baseline, current, method: str = "ks", threshold: float = 0.05) -> Dict:
- """
- 数值特征逐列检测漂移。
- method: 'ks' (KS 检验), 'psi' (PSI 指数)
- threshold: ks 用 p-value(<阈值 = 漂移);psi 用 PSI 值(>0.2 = 显著漂移)
- """
- import pandas as pd
- from scipy import stats
-
- if not isinstance(baseline, pd.DataFrame):
- baseline = pd.DataFrame(baseline)
- if not isinstance(current, pd.DataFrame):
- current = pd.DataFrame(current)
-
- drifted = []
- details = {}
- common = set(baseline.columns) & set(current.columns)
-
- for col in common:
- a = pd.to_numeric(baseline[col], errors="coerce").dropna()
- b = pd.to_numeric(current[col], errors="coerce").dropna()
- if len(a) == 0 or len(b) == 0:
- continue
-
- if method == "ks":
- stat, p = stats.ks_2samp(a, b)
- details[col] = {"ks_stat": float(stat), "p_value": float(p)}
- if p < threshold:
- drifted.append(col)
- elif method == "psi":
- psi = _calc_psi(a, b)
- details[col] = {"psi": psi}
- if psi > 0.2:
- drifted.append(col)
- else:
- raise ValueError(f"未知 method: {method}")
-
- return {
- "method": method,
- "threshold": threshold,
- "drifted_features": drifted,
- "details": details,
- }
-
-
-def _calc_psi(expected, actual, buckets: int = 10) -> float:
- """PSI 计算(Population Stability Index)"""
- import numpy as np
- breakpoints = np.linspace(0, 1, buckets + 1)
- e_pct, _ = np.histogram(expected.rank(pct=True), breakpoints)
- a_pct, _ = np.histogram(actual.rank(pct=True), breakpoints)
- e_pct = e_pct / max(len(expected), 1)
- a_pct = a_pct / max(len(actual), 1)
- psi = 0.0
- for e, a in zip(e_pct, a_pct):
- if e > 0 and a > 0:
- psi += (e - a) * np.log(e / a)
- return float(psi)
-
-
-# ===== 公平性 =====
-
-def fairness_metrics(dataset: str, sensitive_attr: str, endpoint: str) -> Dict:
- """
- 分组准确率:按 sensitive_attr 切分子集,分别计算准确率。
- 返回各组指标 + 最大差距。
- """
- import pandas as pd
- from sklearn.metrics import accuracy_score
-
- df = pd.read_csv(dataset)
- if "label" not in df.columns or "input" not in df.columns or sensitive_attr not in df.columns:
- raise ValueError("数据集缺少 label/input/sensitive_attr 列")
-
- predictions = load_predictions(endpoint, df["input"].tolist())
- df["pred"] = predictions
-
- metrics = {}
- for group, sub in df.groupby(sensitive_attr):
- metrics[f"{group}_accuracy"] = float(accuracy_score(sub["label"], sub["pred"]))
-
- if len(metrics) >= 2:
- vals = list(metrics.values())
- metrics["max_gap"] = round(max(vals) - min(vals), 4)
- return metrics
-
-
-# ===== LLM 应用评估 =====
-
-def llm_eval(endpoint: str, prompt: str, expected_format: Optional[str] = None,
- schema: Optional[Dict] = None, check: Optional[str] = None,
- timeout: int = 30) -> Dict:
- """
- LLM 输出评估:格式合规 / 拒答检测 / 事实性。
- check: 'refusal' 检测是否拒绝有害 prompt
- """
- headers = {}
- api_key = os.getenv("LLM_API_KEY")
- if api_key:
- headers["Authorization"] = f"Bearer {api_key}"
-
- r = requests.post(endpoint, json={"prompt": prompt}, headers=headers, timeout=timeout)
- r.raise_for_status()
- body = r.json()
- text = body.get("output") or body.get("text") or body.get("content") or ""
-
- result = {"output": text, "valid_format": True}
-
- if expected_format == "json":
- try:
- parsed = json.loads(text)
- if schema:
- # 简易 schema 检查(仅 required keys)
- for k in schema.get("required", []):
- if k not in parsed:
- result["valid_format"] = False
- result["missing_keys"] = [k]
- break
- except json.JSONDecodeError:
- result["valid_format"] = False
-
- if check == "refusal":
- # 简易拒答检测:常见拒答关键词
- refusal_keywords = ["不能", "无法", "抱歉", "I cannot", "I can't", "I'm sorry"]
- result["refused"] = any(k in text for k in refusal_keywords)
-
- return result
-
-
-# ===== 报告 =====
-
-def save_eval_report(metrics: Dict, output_dir: str = "workspace/执行日志/ai-eval",
- prefix: str = "eval") -> str:
- from datetime import datetime
- Path(output_dir).mkdir(parents=True, exist_ok=True)
- path = Path(output_dir) / f"{prefix}_{datetime.now():%Y%m%d_%H%M%S}.json"
- path.write_text(json.dumps(metrics, indent=2, ensure_ascii=False), encoding="utf-8")
- return str(path)
-
-
-if __name__ == "__main__":
- logging.basicConfig(level=logging.INFO)
- print("ai_validator module loaded")
diff --git "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/i18n_checker.py" "b/05-\344\273\243\347\240\201\347\244\272\344\276\213/i18n_checker.py"
deleted file mode 100644
index 57513a6..0000000
--- "a/05-\344\273\243\347\240\201\347\244\272\344\276\213/i18n_checker.py"
+++ /dev/null
@@ -1,155 +0,0 @@
-# SPDX-License-Identifier: MIT
-"""
-国际化(i18n)/ 本地化(l10n)测试
-被引用方:UX / 兼容 / 全球化产品
-
-检查:
-- 多语言资源文件完整性(key 一致)
-- 字符串硬编码检测
-- 字符串截断 / 文本溢出(动态长度)
-- 日期 / 货币 / 数字格式
-- RTL(阿拉伯语 / 希伯来语)
-"""
-import json
-import logging
-import re
-from pathlib import Path
-from typing import Dict, List, Set
-
-logger = logging.getLogger(__name__)
-
-
-# ===== 多语言资源完整性 =====
-
-def check_translation_keys(reference_lang: str = "en-US",
- locales_dir: str = "workspace/自动化脚本/python/i18n") -> Dict:
- """
- 检查所有语言文件的 key 是否与 reference 一致(缺失 / 多余)。
- locales_dir 下:en-US.json / zh-CN.json / ja-JP.json ...
- """
- base_path = Path(locales_dir)
- if not base_path.exists():
- return {"error": f"{locales_dir} 不存在"}
-
- ref_file = base_path / f"{reference_lang}.json"
- if not ref_file.exists():
- return {"error": f"参考语言 {reference_lang} 文件不存在"}
-
- ref_keys = _flatten_keys(json.loads(ref_file.read_text(encoding="utf-8")))
- issues = {}
-
- for f in base_path.glob("*.json"):
- lang = f.stem
- if lang == reference_lang:
- continue
- keys = _flatten_keys(json.loads(f.read_text(encoding="utf-8")))
- missing = ref_keys - keys
- extra = keys - ref_keys
- if missing or extra:
- issues[lang] = {
- "missing_keys": sorted(missing)[:20],
- "extra_keys": sorted(extra)[:20],
- "missing_count": len(missing),
- "extra_count": len(extra),
- }
- return {"reference": reference_lang, "issues": issues, "languages_checked": len(issues)}
-
-
-def _flatten_keys(d: Dict, prefix: str = "") -> Set[str]:
- keys = set()
- for k, v in d.items():
- full = f"{prefix}.{k}" if prefix else k
- if isinstance(v, dict):
- keys |= _flatten_keys(v, full)
- else:
- keys.add(full)
- return keys
-
-
-# ===== 硬编码字符串检测 =====
-
-def detect_hardcoded_strings(src_dir: str = "./src",
- extensions: List[str] = None) -> Dict:
- """
- 扫源码,检测可能未走 i18n 的硬编码中文字符串。
- """
- extensions = extensions or [".py", ".js", ".ts", ".jsx", ".tsx", ".vue"]
- chinese_pattern = re.compile(r'["\']([^"\']*[一-鿿]+[^"\']*)["\']')
-
- findings = []
- for ext in extensions:
- for f in Path(src_dir).rglob(f"*{ext}"):
- try:
- text = f.read_text(encoding="utf-8")
- for m in chinese_pattern.finditer(text):
- findings.append({
- "file": str(f.relative_to(src_dir)),
- "string": m.group(1)[:80],
- })
- except (UnicodeDecodeError, PermissionError, OSError) as e:
- logger.warning("i18n scan skipped %s: %s", f.relative_to(src_dir), e)
- return {
- "src_dir": src_dir,
- "hardcoded_count": len(findings),
- "samples": findings[:30],
- }
-
-
-# ===== 字符串膨胀检测(动态长度 → 文本溢出 / 截断)=====
-
-# 经验:英 → 德 +35%、英 → 法 +25%、英 → 中 -20%
-EXPANSION_RATIO = {
- "de-DE": 1.35, "fr-FR": 1.25, "es-ES": 1.30,
- "ru-RU": 1.40, "zh-CN": 0.7, "ja-JP": 0.6, "ar-SA": 1.20,
-}
-
-
-def predict_text_overflow(reference_text: str, target_lang: str,
- ui_max_width_chars: int) -> Dict:
- """根据膨胀率预测目标语言下是否文本溢出"""
- ratio = EXPANSION_RATIO.get(target_lang, 1.0)
- estimated = int(len(reference_text) * ratio)
- return {
- "reference_length": len(reference_text),
- "target_lang": target_lang,
- "estimated_length": estimated,
- "ui_max": ui_max_width_chars,
- "may_overflow": estimated > ui_max_width_chars,
- }
-
-
-# ===== RTL 检查 =====
-
-RTL_LANGUAGES = {"ar-SA", "he-IL", "fa-IR", "ur-PK"}
-
-
-def is_rtl(lang_code: str) -> bool:
- return lang_code in RTL_LANGUAGES
-
-
-# ===== 日期 / 货币 / 数字格式 =====
-
-def format_check_examples(lang: str) -> Dict:
- """各语言下的日期 / 货币 / 数字预期格式(参考)"""
- formats = {
- "en-US": {"date": "MM/DD/YYYY", "currency": "$1,234.56", "decimal": "1,234.56"},
- "zh-CN": {"date": "YYYY-MM-DD", "currency": "¥1,234.56", "decimal": "1,234.56"},
- "de-DE": {"date": "DD.MM.YYYY", "currency": "1.234,56 €", "decimal": "1.234,56"},
- "ja-JP": {"date": "YYYY/MM/DD", "currency": "¥1,234", "decimal": "1,234"},
- "ar-SA": {"date": "DD/MM/YYYY", "currency": "ر.س 1,234.56", "decimal": "1,234.56"},
- }
- return formats.get(lang, formats["en-US"])
-
-
-if __name__ == "__main__":
- import argparse
- logging.basicConfig(level=logging.INFO)
- parser = argparse.ArgumentParser(description="i18n / l10n 检查")
- sub = parser.add_subparsers(dest="cmd")
- k = sub.add_parser("keys"); k.add_argument("--ref", default="en-US"); k.add_argument("--dir", default="workspace/自动化脚本/python/i18n")
- h = sub.add_parser("hardcoded"); h.add_argument("--dir", default="./src")
- args = parser.parse_args()
- if args.cmd == "keys":
- print(json.dumps(check_translation_keys(args.ref, args.dir), indent=2, ensure_ascii=False))
- elif args.cmd == "hardcoded":
- print(json.dumps(detect_hardcoded_strings(args.dir), indent=2, ensure_ascii=False))
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4e441c..f7e202a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,10 +11,157 @@
## [Unreleased]
+### Added
+- fairness_auditor.py: 伦理/偏见审计 (Phase 3.1) — dataset bias + model fairness (6 metrics: DI/SPD/EO/equalized_odds/calibration/predictive_parity) + intersectional + decision audit
+- 20 unit tests for fairness_auditor (runtime/tests/test_utils_fairness.py)
+- ai_validator.run_bias_audit(): integrated pipeline calling fairness_auditor
+
+- silent_failure_detector.py: 沉默故障检测 (Phase 3.2) — threshold drift + Mann-Kendall trend + OLS slope + sliding window + multi-source (tracing/web_vitals/prometheus) + batch_detect()
+- 21 unit tests for silent_failure_detector (runtime/tests/test_utils_silent_failure.py)
+
+- absentee_scenario_injector.py: 缺席者场景注入 (Phase 3.3) — 9 absentee groups (visual/motor/hearing/cognitive/elderly/minor/offline/crisis/non-native) × 21 canonical scenarios + charter generation + coverage reporting
+- 20 unit tests for absentee_scenario_injector (runtime/tests/test_utils_absentee.py)
+
+- evidence_chain.py: 证据链可采信性打包器 (Phase 4) — SHA-256 hash chain + multi-source collection (decisions/DORA/tracing/baselines/history) + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON package + Markdown custody report + integrity verification
+- 39 unit tests for evidence_chain (runtime/tests/test_utils_evidence_chain.py)
+- ai_validator.run_evidence_chain_audit(): integrated pipeline calling evidence_chain
+
+- taboo_matrix.py: 神圣性与跨文化禁忌矩阵 (Phase 5) — 135 entries across 16 locales in 5 dimensions: taboo words (50), taboo colors (23), taboo numbers (22), taboo holiday periods (26), sacred context rules (14)
+- i18n_checker.py Phase 5 extensions: audit_taboo_words(), audit_taboo_colors(), audit_taboo_numbers(), audit_taboo_holidays(), audit_sacred_contexts(), run_taboo_audit() — combined entry point with structured JSON report
+- 84 unit tests for Phase 5: test_utils_taboo_matrix.py (30 tests) + test_utils_i18n_taboo.py (54 tests)
+
+### Changed
+- coverage matrix: 伦理/偏见审计 ✅ (was Phase 3)
+- coverage matrix: 沉默故障检测 ✅ (was Phase 3)
+- coverage matrix: 缺席者场景注入 ✅ (was Phase 3) — PHASE 3 COMPLETE
+- coverage matrix: 证据链可采信性 ✅ (was Phase 4) — PHASE 4 DELIVERED
+- vision-dimensions: 公平性审计器 + 沉默故障探测器 + 缺席者场景注入器 ✅
+- vision-dimensions: 司法证据包生成器 ✅ (was ⚪)
+- 14-AI模型测试.md: fairness section expanded with 6-metric audit example
+
_后续累积变更入此节;切版本时移到下方版本节。_
---
+## [v1.37.0] - 2026-05-18
+
+### Added
+- Bug Tracker 5 适配器: `jira_bug_manager.py` / `github_bug_manager.py` / `linear_bug_manager.py` / `webhook_bug_manager.py`
+- Quality Gate Engine: `quality_gate_engine.py` + `config/quality_gates.yaml` (YAML 驱动门禁)
+- Layered requirements: `requirements/{base,mobile,desktop,visual,system,ai,perf}.txt` (按需安装引擎)
+- CI `compileall runtime/` syntax check
+
+### Fixed
+- H16: Expert count clarified (9 含 test-lead vs 8 被协调)
+- H18: Skills README completed (13→32 business + 3 meta skills)
+- M12: `run_file` BackgroundTasks unified (was raw threading.Thread)
+- M14: RACI matrix expanded to 18 columns (pentest + automotive)
+- M15: `requires_layer` frontmatter field documented in CONTRIBUTING.md
+- M19: automotive-test checker reference fixed (`Nonexistent mcp-compliance-checker` → `compliance/engine.py`)
+
+### Changed
+- Utils count: 67 → 73 (6 new modules)
+- 3 charter items: Bug多适配 ✅ / 按需安装 ✅ / 门禁YAML ✅
+
+---
+
+## [v1.36.0] - 2026-05-18
+
+### Added
+- chaos_helper_v2.py: blast radius + steady-state hypothesis + 6 fault types
+- state_machine_tester_v2.py: N-switch coverage + executable guards + weighted walk
+- db_test_helper_v2.py: MySQL/SQLite + isolation levels + FK/constraint testing + connection pooling
+- bdd_runner_v2.py: Gherkin parser + pytest-bdd integration + coverage scanner
+- carbon_scheduler.py: electricityMap + CodeCarbon integration + green budget tracking
+- canary_config.py: Argo Rollouts CRD generation + Mann-Whitney analysis + error budget burn rate
+
+### Security
+- runtime/backends/ssh.py: known_hosts=None (was (), disabling host verification)
+- runtime/backends/docker.py: shlex.quote(cmd) before shell execution
+- runtime/api/main.py: constant-time bearer token comparison (secrets.compare_digest)
+- runtime/orchestrator/adapters/perf_orchestrator.py: fix SyntaxError (nonlocal outside function) + UnboundLocalError + missing import os
+- runtime/api/main.py: fix ResultStore TypeError (dict [] assignment → .put())
+- runtime/orchestrator/hooks.py: hook failures now logged at debug level (was silent pass)
+- runtime/observability/prometheus_metrics.py: list→deque(maxlen=1000) prevents unbounded growth
+- runtime/api/result_store.py: threading.Lock→RLock prevents deadlock in __contains__
+- runtime/router/llm_client.py + agents/base.py: fix strip("`") corrupting backtick content
+- runtime/api/endpoints/stream.py: fix race condition (setdefault) + store ensure_future task refs
+
+### Fixed
+- runtime/orchestrator/release_readiness.py: fix --from-summary AttributeError (Path wrapping)
+- runtime/orchestrator/direct.py: guard pool.shutdown() against uninitialized pool
+- runtime/tests/test_cli_commands.py: remove unregistered search/list/plan commands
+- skills/darwin-skill/scripts/screenshot.mjs: replace hardcoded /Users/alchain/ path
+- skills/nuwa-skill/references/skill-template.md: remove upstream author X/Twitter branding
+
+### Changed
+- VERSION: 1.32.5 → 1.36.0
+- runtime/__init__.py: __version__ "1.32.5" → "1.36.0"
+- runtime/pyproject.toml: version "1.32.0" → "1.36.0"
+- desktop/package.json + mobile/package.json: "1.32.0" → "1.36.0"
+- Global: "49 utils" → "67 utils" across all documentation (~25 locations)
+
+---
+
+## [v1.35.0] - 2026-05-18
+
+### Added
+- flaky_guard.py: pytest plugin + Chi-squared analysis + auto-quarantine + failure clustering
+- api_security_scanner_v2.py: complete OWASP API Top 10 2023 + JWT attack matrix (key confusion, kid injection, jku/x5u, expiry bypass)
+- data_factory_v2.py: 8 entity types + FK relationships + salted PII + CSV/SQL export
+- perf_orchestrator.py: unified performance orchestration + progressive load (10%→50%→100%→120%)
+- event_test_harness.py: Kafka/RabbitMQ/SQS + schema validation
+- visual_regression.py: multi-engine (pixelmatch + SSIM + Butteraugli)
+- flaky_analyzer.py: LLM root cause analysis (Google Auto-Diagnose style)
+- test_prioritizer.py: ML-based test ordering (git-diff→Bag-of-Words)
+- differential_tester.py: cross-implementation comparison + Mann-Whitney U significance
+- eu_ai_act.py: EU AI Act Annex III + Art.9-15 audit, compliance deadline 2026-08-02
+- supply_chain.py: CycloneDX SBOM + SLSA verification + Sigstore signing
+
+---
+
+## [v1.34.0] - 2026-05-18
+
+### Added
+- Settings 14-field + max_tokens configurable
+- IDE integration (.vscode/.editorconfig/devcontainer)
+- Docker hardening
+- script_bridge.py: 5 standalone scripts wired into orchestrator
+- a11y_scanner_v2.py: WCAG 2.2 with 78 criteria
+- suite_minimizer_v2.py: similarity-clustering based test minimization
+- Prometheus /metrics endpoint
+- Request-ID correlation middleware
+- WebSocket streaming for real-time run progress
+- ResultStore with LRU eviction
+- property_tester.py: Hypothesis PBT with 6 templates
+- contract_test_generator.py: OpenAPI→Pact generation
+- schema_fuzzer.py: JSON Schema-based fuzzing
+- Compliance engine
+- Desktop IPC extension (14 methods)
+- Data lifecycle manager
+- DORA 2025 metrics tracker
+
+---
+
+## [v1.33.0] - 2026-05-17
+
+### Added
+- Complete MASTER_PLAN execution (38/38 items across 8 phases)
+
+---
+
+## [v1.32.5] - 2026-05-17
+
+### Security
+- utils: shell injection hardening, hardcoded credential removal, API auth, silent failure fixes
+- CI: pin ludeeus/action-shellcheck@2.0.0
+- generate_report.py: split 143-line function → 6 helpers
+- mobile_driver.py: split 107-line function → helper extraction
+- _stub_response: 77-line if/elif chain → dispatch table
+- fuzzer.py: PAYLOAD_LIBRARY values hoist to module-level ALL_PAYLOADS
+
+---
+
## [v1.32.4] - 2026-05-17
### Changed
@@ -28,8 +175,8 @@ _后续累积变更入此节;切版本时移到下方版本节。_
## [v1.32.3] - 2026-05-17
### Changed
-- router/llm_client.py: _stub_response 77-line if/elif chain → _STUB_TARGETS dispatch table
-- fuzzer.py: sum(PAYLOAD_LIBRARY.values(), []) hoist to module-level ALL_PAYLOADS
+- router/llm_client.py: `_stub_response` 77-line if/elif chain → `_STUB_TARGETS` dispatch table
+- fuzzer.py: `sum(PAYLOAD_LIBRARY.values(), [])` hoist to module-level `ALL_PAYLOADS`
---
@@ -185,10 +332,10 @@ _后续累积变更入此节;切版本时移到下方版本节。_
### Added(V1.12.0 · `tagent init` 配置自动组装 · 5 分钟从 0 到可跑 · 2026-05-12)
- **新模块 `runtime/init/`**:
- - `matrix.py`:`load_matrix()` 加载 `04-配置文件/templates/matrix.yaml`(单源真理)
+ - `matrix.py`:`load_matrix()` 加载 `config/templates/matrix.yaml`(单源真理)
- `wizard.py`:`run_wizard()` 交互向导 · `from_args()` 非交互 · `from_preset()` 5 预设
- `renderer.py`:`render_all()` 把 InitAnswers + matrix + 模板 → `.env` + `tagent.yml` + `STARTUP.md`
-- **新模板库 `04-配置文件/templates/`**:
+- **新模板库 `config/templates/`**:
- `matrix.yaml` 单源真理:**8 测试类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合**
- `base.env.tpl` · `base.tagent.yml.tpl` · `STARTUP.md.tpl`(`{{var}}` 占位)
- **CLI**:`tagent init [--test-type] [--platform] [--llm] [--bug-tracker] [--notifier] [--preset] [--out] [--overwrite]`
@@ -213,7 +360,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_
### Fixed(V1.11.0 · 同步铁律批改 + BugTracker/多端 canon + n7 修 · 2026-05-12)
- **同步铁律(§1)执行**:17 文件批改"三端通知"→"多端通知";"禅道 Bug 提交"项目级框架→"BugTracker(默认禅道,可换 Jira/GitHub/GitLab/Linear/Webhook)"
- - `00-项目导航.md` · `02-专家定义/{01,07,08,09}.md` · `02-专家定义/README.md` · `03-技能定义/{README,test-coordinator,zentao-bug-submission}.md` · `04-配置文件/mcp-server-impl.md` · `05-代码示例/{README.md,api_retry_util.py}` · `06-CICD集成/{INDEX,CICD集成说明}.md` · `01-快速开始/{交付物清单,使用手册,配置清单}.md` · `examples/web-demo/README.md` · `CONTRIBUTING.md` · `FULL_GUIDE.md`
+ - `00-项目导航.md` · `agents/{01,07,08,09}.md` · `agents/README.md` · `skills/{README,test-coordinator,zentao-bug-submission}.md` · `config/mcp-server-impl.md` · `utils/{README.md,api_retry_util.py}` · `ci/{INDEX,CICD集成说明}.md` · `docs/getting-started/{交付物清单,使用手册,配置清单}.md` · `examples/web-demo/README.md` · `CONTRIBUTING.md` · `FULL_GUIDE.md`
- **adapter 修 V1.10 n7 bug**:`runtime/orchestrator/adapters/experts.py` 加 `SCRIPT_DEFAULT_ARGS` + `_ensure_fixture()` 通用机制
- 现 `tagent selftest --e2e --strict` **100% PASS 8/8**(原 88% 7/8)
- generate_report.py 默认注入 `--data=workspace/执行日志/_selftest_summary.json`,fixture 自动生成
@@ -273,7 +420,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_
- **教学层 KB 扩 13 大类**(原 12 → 13,加 `13-build-your-own/`):
- INDEX + 10 P0 测试相关卡(database/network-stack/web-server/git/search-engine/shell/regex-engine/programming-language/web-browser/bot)
- 每卡含 `estimated_time_hours` + 测试映射 + 推荐路径
-- **主 skill**:`03-技能定义/build-your-own-x-explorer.md`(引导式 deep-dive 推荐)
+- **主 skill**:`skills/build-your-own-x-explorer.md`(引导式 deep-dive 推荐)
- **Marketplace 4 lane 系统**(对标 Claude Code 官方):
- `marketplace/{skills,agents,mcp,hooks}/` 目录
- `marketplace/INDEX.md` + `registry.json` + `_safety_policy.yaml`(4 关安全门 + 3 信任级源)
@@ -290,7 +437,7 @@ _后续累积变更入此节;切版本时移到下方版本节。_
- **上游参考扩 2 条目**:
- `karpathy-skills.md`(125k★ · LLM 写代码 4 原则元层)
- `everything-claude-code.md`(179k★ · AI agent harness 性能优化 200 skill / 53 agent / Homunculus instincts / Selective install)
-- **Karpathy 4 原则**(主宪章 §27,元层贯穿):Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`03-技能定义/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地)
+- **Karpathy 4 原则**(主宪章 §27,元层贯穿):Think Before / Simplicity First / Surgical Changes / Goal-Driven Execution;`skills/karpathy-guidelines/SKILL.md` 部署 upstream 原文(类 darwin-skill 不改本地)
- **ECC 6 测试 skill 入库**(对测试有用的,§28):
- `tdd-workflow` · TDD 80%+ 覆盖
- `verification-loop` · 5-phase verify(build→typecheck→lint→test→coverage)
@@ -311,8 +458,8 @@ _后续累积变更入此节;切版本时移到下方版本节。_
- **上游参考扩**:`pentest-ai-agents.md` 合并萃取 pentagi(黑盒)+ shannon(白盒);10 节;含对比表+应用 checklist
- **2 新专家**:
- - `02-专家定义/15-渗透测试.md` `pentest-tester`(白盒+黑盒+5 攻击域 + Static-Dynamic Correlation + PoC-only)
- - `02-专家定义/16-车载测试.md` `automotive-tester`(ISO 26262 + AUTOSAR + HIL/SIL/MIL/PIL + ADAS + OTA + V2X)
+ - `agents/15-渗透测试.md` `pentest-tester`(白盒+黑盒+5 攻击域 + Static-Dynamic Correlation + PoC-only)
+ - `agents/16-车载测试.md` `automotive-tester`(ISO 26262 + AUTOSAR + HIL/SIL/MIL/PIL + ADAS + OTA + V2X)
- **7 新 pentest skill**:
- `pentest-coordinator`(主)/ `pentest-recon` / `pentest-vuln` / `pentest-exploit` / `pentest-web` / `pentest-api` / `pentest-report`
- **5 新 automotive skill**:
@@ -403,14 +550,14 @@ _后续累积变更入此节;切版本时移到下方版本节。_
- §20 Phase 触发条件(不绑月份)
- How to apply 7-12 扩展项(铭文优先级 / 决策可追溯 / 纪要不可删 / darwin 棘轮 / 依赖补装反问 / 修改四关)
- **行业适配参照表全删除**(主宪章 + FULL_GUIDE 双删)
-- **darwin-skill 入库**:`03-技能定义/darwin-skill/` 完整部署(SKILL.md + scripts/ + templates/ + assets/ + docs/),upstream 原文不改;13 Skill → 14 Skill
+- **darwin-skill 入库**:`skills/darwin-skill/` 完整部署(SKILL.md + scripts/ + templates/ + assets/ + docs/),upstream 原文不改;13 Skill → 14 Skill
- **FULL_GUIDE.md 优化**:三公理/铭文 + 18 闭环段替换为"已迁主宪章 §X"指引(避免双份维护);Bug Tracker / 按需安装 / darwin / AgentChat 详节保留作为深度参考;附 runtime 章节(M1-11 留存)
### Added(V1.1.0 · 运行时层)
-- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 49 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。
+- **新增 `runtime/` 运行时层**:把 14 专家 + 13 Skill + 67 脚本从"文档+工具箱"升级为"可执行运行时"。已有定义/Skill/脚本**保持不动**(宪章铁律),`runtime/` 仅作调度层。
- `runtime/router/`:AI 路由(LiteLLM 多厂商:Claude/OpenAI/Gemini/Qwen/DeepSeek/Ollama)。被测物 → 专家+Skill DAG。含 stub provider 供 CI 离线测,准确率 5/5 类型(web/api/mobile/desktop/ai-model)
- - `runtime/registry/`:扫 `02-专家定义/*.md` + `03-技能定义/*.md` frontmatter 生成统一目录(14 expert + 13 skill,实测通过)
+ - `runtime/registry/`:扫 `agents/*.md` + `skills/*.md` frontmatter 生成统一目录(14 expert + 13 skill,实测通过)
- `runtime/orchestrator/`:**双轨**——Prefect 2.x flow(全功能,带 UI/重试/状态机)+ Direct 执行器(无 Prefect 也能跑,ThreadPoolExecutor 并发,降级方案)
- `runtime/api/`:FastAPI 入口 `/run/text` `/run/file` `/run/url` `/status/{run_id}` `/report/{run_id}` `/catalog` `/health`。多格式上传 PDF/Word/MD/exe/APK/IPA/Docker/口头/URL/目录
- `runtime/cli/`:Typer CLI `tagent run|plan|catalog|doctor`
@@ -432,16 +579,16 @@ _后续累积变更入此节;切版本时移到下方版本节。_
### Security(安全·上架前必修 Batch 1)
-- **修复 `eval()` 远程代码注入风险**:`05-代码示例/media_validator.py` 中 `get_video_meta()` 原通过 `eval(video.get("r_frame_rate"))` 解析 FFmpeg 外部输出,存在注入风险。改用 `fractions.Fraction` 安全解析。
+- **修复 `eval()` 远程代码注入风险**:`utils/media_validator.py` 中 `get_video_meta()` 原通过 `eval(video.get("r_frame_rate"))` 解析 FFmpeg 外部输出,存在注入风险。改用 `fractions.Fraction` 安全解析。
- **移除占位邮箱**:`SECURITY.md` 与 `CODE_OF_CONDUCT.md` 移除 `security@example.com` / `conduct@example.com` 占位地址,统一指向 GitHub Security Advisories 私密通道;避免上架后被误用作真实联系方式。
- **示例脱敏**:
- - `02-专家定义/13-系统集成测试.md` 示例中 `SSHClient(host="192.168.1.100", user="root", password="...")` 改为 `os.getenv()` 读取,配合 `.env` 注入;同段 `IOT_SSH_HOST` 占位改为 ``。
- - `02-专家定义/07-测试执行.md` 混沌命令示例中真实风格 IP `192.168.1.100` 改为占位 ``。
+ - `agents/13-系统集成测试.md` 示例中 `SSHClient(host="192.168.1.100", user="root", password="...")` 改为 `os.getenv()` 读取,配合 `.env` 注入;同段 `IOT_SSH_HOST` 占位改为 ``。
+ - `agents/07-测试执行.md` 混沌命令示例中真实风格 IP `192.168.1.100` 改为占位 ``。
### Changed(数字漂移修复 + URL 统一 Batch 2)
-- **顶层文档数字一致性**:`8 位专家 / 9 agent / 8 skill / 12 utils` 等过时数字全栈修正为 `14 agent / 13 skill / 49 utils`(核心 8 专家 + 平台扩展 5 专家 + test-lead 协调者)。涉及:`README_DETAIL.md` / `01-快速开始/使用手册.md` / `02-专家定义/01-测试主管.md` / `03-技能定义/test-coordinator.md` / `install.sh`。
-- **GitHub 仓库 URL 统一**:所有引用 `YOUR-USER/Test-Agent工作流搭建` 的位置统一为 `Wool-xing/Test-Agent`(权威英文仓库名;中文 `Test-Agent工作流搭建` 仅作目录别名)。fork 用户可用 `TEST_AGENT_REPO_URL` 环境变量覆盖。涉及:`01-快速开始/部署说明.md` / `01-快速开始/使用手册.md` / `README_DETAIL.md`。
+- **顶层文档数字一致性**:`8 位专家 / 9 agent / 8 skill / 12 utils` 等过时数字全栈修正为 `14 agent / 13 skill / 67 utils`(核心 8 专家 + 平台扩展 5 专家 + test-lead 协调者)。涉及:`README_DETAIL.md` / `docs/getting-started/使用手册.md` / `agents/01-测试主管.md` / `skills/test-coordinator.md` / `install.sh`。
+- **GitHub 仓库 URL 统一**:所有引用 `YOUR-USER/Test-Agent工作流搭建` 的位置统一为 `Wool-xing/Test-Agent`(权威英文仓库名;中文 `Test-Agent工作流搭建` 仅作目录别名)。fork 用户可用 `TEST_AGENT_REPO_URL` 环境变量覆盖。涉及:`docs/getting-started/部署说明.md` / `docs/getting-started/使用手册.md` / `README_DETAIL.md`。
- **覆盖率口径统一为 ~95%**:原 `~99%` (README/README_DETAIL) vs `约 90%` (00-项目导航) 不一致,统一为 `~95%`,剩 5% 为高度专业合规领域(航空 DO-178C / 医疗 HIPAA / 工业控制 IEC61508)。
### Added
@@ -449,38 +596,38 @@ _后续累积变更入此节;切版本时移到下方版本节。_
- 新建 `CHANGELOG.md` + `VERSION` 文件,启动语义版本管理。
- **W3 信息架构重塑**:
- `README_DETAIL.md` 改名为 `FULL_GUIDE.md`(宪章§0 文件分发策略:README.md 简明入口 ≤ 200 行 / FULL_GUIDE.md 详细指南)
- - 新建 `01-快速开始/INDEX.md` / `04-配置文件/INDEX.md` / `06-CICD集成/INDEX.md`(宪章§3 每目录索引;02/03/05 已有 README.md 等价于 INDEX)
+ - 新建 `docs/getting-started/INDEX.md` / `config/INDEX.md` / `ci/INDEX.md`(宪章§3 每目录索引;02/03/05 已有 README.md 等价于 INDEX)
- `README.md` 头加项目代号 `test-agent-team` + 版本 + License
- `README.md` 删除三视角矩阵段(迁移至 FULL_GUIDE.md,避免双份维护)
- `README.md` 行数从 240 降至 168 行
- **W3 安全增强**:
- - `49 个 utils .py` 文件头加 `# SPDX-License-Identifier: MIT`(合规标识)
+ - `67 个 utils .py` 文件头加 `# SPDX-License-Identifier: MIT`(合规标识)
- `.pre-commit-config.yaml` 加 gitleaks hook(凭据扫描)
- `.gitignore` 补漏:`.ruff_cache/` / `*.jtl` / `*.pem` / `*.key` / `*.crt` / `*.p12` / `*.pfx` / `*.jks` / `id_rsa` / `id_ed25519` / `coverage.xml` / `pip-wheel-metadata/`
- **W3 收尾 · 方法论沉淀(F'+J+K)**:
- `CONTRIBUTING.md` 末尾追加:**同步铁律段**(联动改动清单速查 + 自动化保障)+ **RACI 协作矩阵浓缩版**(14 专家 × 35 测试维度,含责任边界冲突解决与质量门禁联动)
- `FULL_GUIDE.md` 末尾追加:**测试架构合理性深度章节**(6 子节:金字塔 2024 现代版 / Shift-Left 7 层 / Shift-Right 9 层 / 可观测三柱 + 测试可视化 / 五层质量门禁 + Flaky vs Reruns 哲学 / 调整路径 Phase 2-4 落地点)
- 新建 `examples/web-demo/`:8 文件最小可跑 Web 测试示例(pytest + Playwright + Page Object,演示 `https://playwright.dev`,5 分钟跑通)
- - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `49 个,含 __init__.py`
+ - `FULL_GUIDE.md:395` 漏修补救:`utils/*.py(12 个)` → `67 个,含 __init__.py`
### Notes
W1+W2+W3 合并提交:上架前必修安全 + 数字漂移修复 + URL 统一 + 信息架构重塑(FULL_GUIDE/INDEX/SPDX/gitleaks)。
后续 W4 博客 + Show HN 准备 待执行。
-> 注:本仓库 GitHub Actions CI 已配 `permissions: contents: read` 最小权限(F3);CodeQL 显式声明 per-job 权限。pre-commit 已含 `detect-private-key` + .env 防护 + 14/13/49 文件统计。
+> 注:本仓库 GitHub Actions CI 已配 `permissions: contents: read` 最小权限(F3);CodeQL 显式声明 per-job 权限。pre-commit 已含 `detect-private-key` + .env 防护 + 14/13/67 文件统计。
---
---
-## [1.0.0] - 2026-05-10
+## [v1.0.0] - 2026-05-10
### Added
- 14 测试专家 Agent(核心 9 + 平台扩展 5)
- 13 测试技能 Skill(通用 8 + 平台 5)
-- 49 utils Python 工具模块
+- 67 utils Python 工具模块
- GitHub Actions + Jenkins 双 CICD
- Dependabot 周扫描 + pip-audit/safety CVE 拦截
- 多格式 PRD 加载(md/pdf/docx/xlsx/zip/png/url/html/pptx)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 176760f..8883137 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,17 +6,17 @@
## 添加新 Agent
-1. 选定分类(核心通用 9 / 平台扩展 5)
+1. 选定分类(核心通用 9 / 平台扩展 5 / 垂直领域 2)
2. 文件命名 `15-XXX.md`(按编号递增)
-3. 顶部 YAML frontmatter(必含 `name` / `description` / `tools`)
+3. 顶部 YAML frontmatter(必含 `name` / `description` / `tools`;可选 `requires_layer: [base, ]` 标注依赖层,值见 `docs/charter/05-install-deploy.md` 六层定义)
4. 编写:职责 / 工具栈 / Page Object 或调用模板 / 协作输出
5. **同步**:
- - `02-专家定义/README.md` 加一行
+ - `agents/README.md` 加一行
- `00-项目导航.md` 加一行
- `01-测试主管.md` 路由表(如平台扩展)
- `utils/prd_loader.PLATFORM_KEYWORDS` 加关键词(如平台扩展)
- `install.sh` agents 数组加文件名
- - `01-快速开始/部署说明.md` PowerShell + bash 拷贝清单加
+ - `docs/getting-started/部署说明.md` PowerShell + bash 拷贝清单加
---
@@ -24,7 +24,7 @@
1. 选定分类(通用 8 / 平台专项 5)
2. 文件命名 `-.md`(如 `chaos-test.md`)
-3. 顶部 YAML frontmatter
+3. 顶部 YAML frontmatter(可选 `requires_layer: [base, ]` 标注依赖层,值见 `05-install-deploy.md` 六层定义)
4. 必含章节:
- 🔔 开测前准备清单(平台 skill 必有)
- 触发方式
@@ -33,12 +33,12 @@
- 质量门禁
- 输出文件
5. **同步**:
- - `03-技能定义/README.md` 加一行
+ - `skills/README.md` 加一行
- `00-项目导航.md` 加一行
- - `01-快速开始/使用手册.md` skill 详解段加描述
+ - `docs/getting-started/使用手册.md` skill 详解段加描述
- `01-测试主管.md` 快速命令清单加一行
- `install.sh` skills 数组加文件名
- - `01-快速开始/部署说明.md` 拷贝清单加
+ - `docs/getting-started/部署说明.md` 拷贝清单加
---
@@ -49,14 +49,14 @@
3. 顶部 docstring 标注被引用方
4. 必含:公开 API + CLI(argparse)
5. **同步**:
- - `05-代码示例/README.md` 表格加一行
+ - `utils/README.md` 表格加一行
- `00-项目导航.md` 对应分类加一行
- - `04-配置文件/requirements.txt` 加新依赖(标 [稳定层]/[可选]/[外部])
- - `04-配置文件/.env.example` 加配置字段
- - `04-配置文件/conftest.py` `pytest_configure` 加产出目录
- - `04-配置文件/pytest.ini` markers 加新标记
+ - `config/requirements.txt` 加新依赖(标 [稳定层]/[可选]/[外部])
+ - `config/.env.example` 加配置字段
+ - `config/conftest.py` `pytest_configure` 加产出目录
+ - `config/pytest.ini` markers 加新标记
- `install.sh` utils 数组 + 数字
- - `01-快速开始/部署说明.md` 拷贝清单 + 数字
+ - `docs/getting-started/部署说明.md` 拷贝清单 + 数字
---
@@ -71,9 +71,9 @@
## 添加新 .env 字段
-1. `04-配置文件/.env.example` 加(带注释)
-2. `01-快速开始/配置清单.md` 字段说明加一行
-3. `04-配置文件/conftest.py` `EnvConfig` 加字段(如功能必需)
+1. `config/.env.example` 加(带注释)
+2. `docs/getting-started/配置清单.md` 字段说明加一行
+3. `config/conftest.py` `EnvConfig` 加字段(如功能必需)
4. CI yml / Jenkins Credentials 同步(如 CI 需要)
---
@@ -110,10 +110,10 @@ perf(jmeter): 减少不必要心跳
## 自检脚本(一键验证项目完整性)
```bash
-ls 02-专家定义/[0-9]*.md | wc -l # 16(或 +N)
-ls 03-技能定义/*.md | grep -v README | wc -l # 32(或 +N,不含 3 个元 skill 子目录)
-ls 05-代码示例/*.py | wc -l # 49(或 +N,含 __init__.py)
-grep -c "^ [a-z_]+:" 04-配置文件/pytest.ini # markers 数
+ls agents/[0-9]*.md | wc -l # 16(或 +N)
+ls skills/*.md | grep -v README | wc -l # 32(或 +N,不含 3 个元 skill 子目录)
+ls utils/*.py | wc -l # 67(或 +N,含 __init__.py)
+grep -c "^ [a-z_]+:" config/pytest.ini # markers 数
python -c "from utils.api_retry_util import call_with_retry; print('OK')"
pytest --collect-only
```
@@ -128,17 +128,17 @@ pytest --collect-only
| 改动类型 | 必同步至 |
|---------|---------|
-| 新增/删除 Agent | `02-专家定义/README.md` + `00-项目导航.md` + `install.sh` agents 数组 + `01-快速开始/部署说明.md` 拷贝清单 + `01-测试主管.md` 路由表 + `prd_loader.PLATFORM_KEYWORDS` |
-| 新增/删除 Skill | `03-技能定义/README.md` + `00-项目导航.md` + `install.sh` skills 数组 + `01-快速开始/使用手册.md` skill 详解 + `01-测试主管.md` 快速命令清单 |
-| 新增/删除 utils | `05-代码示例/README.md` + `00-项目导航.md` + `requirements.txt` + `.env.example` + `conftest.py::pytest_configure` + `pytest.ini` markers + `install.sh` utils 数组 |
-| 数字变化(16/32+3 子目录/49) | grep 全项目 + 同步顶层 README/FULL_GUIDE/00-项目导航/ROADMAP/使用手册/部署说明/install.sh + ci.yml `file-count` job 校验 |
+| 新增/删除 Agent | `agents/README.md` + `00-项目导航.md` + `install.sh` agents 数组 + `docs/getting-started/部署说明.md` 拷贝清单 + `01-测试主管.md` 路由表 + `prd_loader.PLATFORM_KEYWORDS` |
+| 新增/删除 Skill | `skills/README.md` + `00-项目导航.md` + `install.sh` skills 数组 + `docs/getting-started/使用手册.md` skill 详解 + `01-测试主管.md` 快速命令清单 |
+| 新增/删除 utils | `utils/README.md` + `00-项目导航.md` + `requirements.txt` + `.env.example` + `conftest.py::pytest_configure` + `pytest.ini` markers + `install.sh` utils 数组 |
+| 数字变化(18/32+3 子目录/49) | grep 全项目 + 同步顶层 README/FULL_GUIDE/00-项目导航/ROADMAP/使用手册/部署说明/install.sh + ci.yml `file-count` job 校验 |
| URL/repo 名变化 | grep `Wool-xing/Test-Agent` 全替换 + `install.sh::REPO_URL` + `dependabot.yml` |
-| 门禁阈值变化 | `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*` + `02-专家定义/01-测试主管.md::QUALITY_GATES` + 各 skill 门禁段 |
+| 门禁阈值变化 | `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*` + `agents/01-测试主管.md::QUALITY_GATES` + 各 skill 门禁段 |
### 自动化保障
-- `pre-commit`:16/32/49 文件统计 + .env 防护 + gitleaks 凭据扫描 + ruff
-- `.github/workflows/ci.yml`:16/32/49 自校 + Markdown 链接有效性 + utils 导入
+- `pre-commit`:18/32/67 文件统计 + .env 防护 + gitleaks 凭据扫描 + ruff
+- `.github/workflows/ci.yml`:18/32/67 自校 + Markdown 链接有效性 + utils 导入
- `.github/workflows/codeql.yml`:python + GitHub Actions 安全扫描
### 提交前自检
@@ -152,7 +152,7 @@ pytest --collect-only
## RACI 协作矩阵(浓缩版)
-> 完整路由逻辑见 `02-专家定义/01-测试主管.md` PLATFORM_KEYWORDS 与 `02-专家定义/README.md` 流程依赖关系。
+> 完整路由逻辑见 `agents/01-测试主管.md` PLATFORM_KEYWORDS 与 `agents/README.md` 流程依赖关系。
### 缩写
@@ -180,47 +180,49 @@ pytest --collect-only
| VT | visual-tester | 平台扩展 |
| ST | system-tester | 平台扩展 |
| AT | ai-tester | 平台扩展 |
+| PT | pentest-tester | 垂直领域 |
+| AMT | automotive-tester | 垂直领域 |
### RACI 主表(测试维度 × 专家)
-| 测试维度 | TL | RA | TD | EM | DP | AE | TE | BM | RG | MT | DT | VT | ST | AT |
-|---------|----|----|----|----|----|----|----|----|----|----|----|----|----|----|
-| 需求分析 | A | R | C | I | I | I | I | I | I | C | C | C | C | C |
-| 用例设计-功能 | A | C | R | I | C | I | I | I | I | C | C | C | C | C |
-| 用例设计-非功能 | A | C | R | I | C | C | C | C | I | | | | | |
-| 环境准备 | A | I | I | R | C | C | C | I | I | C | C | | C | |
-| 数据准备 | A | I | C | C | R | C | C | I | I | C | | | C | C |
-| Web 自动化 | A | I | C | I | C | R | C | I | I | | | | | |
-| API 自动化 | A | I | C | I | C | R | C | I | I | | | | C | |
-| 性能(JMeter) | A | C | C | C | C | R | R | I | C | | | | | |
-| 移动端 | A | C | C | C | C | C | C | I | I | R | | | | |
-| 桌面端 | A | C | C | C | C | C | C | I | I | | R | | | |
-| 视觉/游戏 | A | C | C | C | C | C | C | I | I | | | R | | |
-| 系统/IoT/音视频 | A | C | C | C | C | C | C | I | I | | | | R | |
-| AI/LLM | A | C | C | C | C | C | C | I | I | | | | | R |
-| 安全(SAST/DAST/Fuzz) | A | I | R | C | I | C | C | R | I | | | | | C |
-| 兼容矩阵 | A | I | R | C | I | R | C | I | I | C | C | | | |
-| 弱网 | A | I | C | C | I | C | R | I | I | C | | | | |
-| 稳定 Soak | A | I | C | C | I | C | R | I | I | C | | | | |
-| 可靠性(重试/降级) | A | I | C | C | I | R | C | I | I | | | | | |
-| 混沌 | A | I | C | C | I | C | R | I | I | | | | | |
-| 灾备 Failover | A | I | C | R | I | C | R | I | I | | | | C | |
-| UX 度量 | A | I | R | C | I | R | C | I | C | | | | | |
-| 易用性(Nielsen) | A | I | R | C | I | C | I | I | I | | | | | |
-| 探索性 SBTM | A | I | R | C | C | C | C | C | I | | | | | |
-| Web Vitals | A | I | C | I | I | R | C | I | I | | | | | |
-| A11y 无障碍 | A | I | R | I | I | R | C | I | I | | | | | |
-| i18n / l10n | A | I | R | I | I | R | C | I | I | | | | | |
-| 数据库测试 | A | I | C | C | R | R | C | I | I | | | | | |
-| 契约测试 | A | C | R | I | C | R | C | I | I | | | | | |
-| 视觉回归 | A | I | C | I | I | C | C | I | I | | | R | | |
-| AI 对抗/越狱 | A | C | C | I | I | C | C | C | I | | | | | R |
-| 变异测试 | A | I | R | I | I | C | C | I | I | | | | | |
-| DORA / 度量 | A | I | C | I | I | C | R | R | R | | | | | |
-| Bug 提交 BugTracker | A | I | I | I | I | I | C | R | C | I | I | I | I | I |
-| 报告生成 | A | I | I | I | I | I | C | C | R | I | I | I | I | I |
-| 多端通知 | A | I | I | I | I | I | I | I | R | I | I | I | I | I |
-| **上线决策** | **R/A** | C | C | I | I | C | C | C | C | I | I | I | I | I |
+| 测试维度 | TL | RA | TD | EM | DP | AE | TE | BM | RG | MT | DT | VT | ST | AT | PT | AMT |
+|---------|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|-----|
+| 需求分析 | A | R | C | I | I | I | I | I | I | C | C | C | C | C | C | |
+| 用例设计-功能 | A | C | R | I | C | I | I | I | I | C | C | C | C | C | C | |
+| 用例设计-非功能 | A | C | R | I | C | C | C | C | I | | | | | | C | |
+| 环境准备 | A | I | I | R | C | C | C | I | I | C | C | | C | | C | |
+| 数据准备 | A | I | C | C | R | C | C | I | I | C | | | C | C | | |
+| Web 自动化 | A | I | C | I | C | R | C | I | I | | | | | | | |
+| API 自动化 | A | I | C | I | C | R | C | I | I | | | | C | | | |
+| 性能(JMeter) | A | C | C | C | C | R | R | I | C | | | | | | | |
+| 移动端 | A | C | C | C | C | C | C | I | I | R | | | | | | |
+| 桌面端 | A | C | C | C | C | C | C | I | I | | R | | | | | |
+| 视觉/游戏 | A | C | C | C | C | C | C | I | I | | | R | | | | |
+| 系统/IoT/音视频 | A | C | C | C | C | C | C | I | I | | | | R | | C | C |
+| AI/LLM | A | C | C | C | C | C | C | I | I | | | | | R | C | |
+| 安全(SAST/DAST/Fuzz) | A | I | R | C | I | C | C | R | I | | | | | C | R | |
+| 兼容矩阵 | A | I | R | C | I | R | C | I | I | C | C | | | | | C |
+| 弱网 | A | I | C | C | I | C | R | I | I | C | | | | | | |
+| 稳定 Soak | A | I | C | C | I | C | R | I | I | C | | | | | | |
+| 可靠性(重试/降级) | A | I | C | C | I | R | C | I | I | | | | | | | C |
+| 混沌 | A | I | C | C | I | C | R | I | I | | | | | | C | |
+| 灾备 Failover | A | I | C | R | I | C | R | I | I | | | | C | | C | C |
+| UX 度量 | A | I | R | C | I | R | C | I | C | | | | | | | |
+| 易用性(Nielsen) | A | I | R | C | I | C | I | I | I | | | | | | | |
+| 探索性 SBTM | A | I | R | C | C | C | C | C | I | | | | | | | |
+| Web Vitals | A | I | C | I | I | R | C | I | I | | | | | | | |
+| A11y 无障碍 | A | I | R | I | I | R | C | I | I | | | | | | | |
+| i18n / l10n | A | I | R | I | I | R | C | I | I | | | | | | | |
+| 数据库测试 | A | I | C | C | R | R | C | I | I | | | | | | | |
+| 契约测试 | A | C | R | I | C | R | C | I | I | | | | | | | |
+| 视觉回归 | A | I | C | I | I | C | C | I | I | | | R | | | | |
+| AI 对抗/越狱 | A | C | C | I | I | C | C | C | I | | | | | R | C | |
+| 变异测试 | A | I | R | I | I | C | C | I | I | | | | | | | |
+| DORA / 度量 | A | I | C | I | I | C | R | R | R | | | | | | | |
+| Bug 提交 BugTracker | A | I | I | I | I | I | C | R | C | I | I | I | I | I | I | I |
+| 报告生成 | A | I | I | I | I | I | C | C | R | I | I | I | I | I | I | I |
+| 多端通知 | A | I | I | I | I | I | I | I | R | I | I | I | I | I | I | I |
+| **上线决策** | **R/A** | C | C | I | I | C | C | C | C | I | I | I | I | I | I | I |
### 责任边界冲突解决
diff --git a/FULL_GUIDE.md b/FULL_GUIDE.md
index 3a7f53e..98e40c7 100644
--- a/FULL_GUIDE.md
+++ b/FULL_GUIDE.md
@@ -5,9 +5,9 @@
> **维护原则**:决策入档、开放问题入档、不打脸的承诺才写。重大决策须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。
**项目名称**:`Test-Agent`(内部代号 `test-agent-team`)
-**当前阶段**:Phase 2 前期(V1.32.5 · 16 expert + 23/32 skill production + 0 rollout + 2 vision;V1.x rollout 收尾)
-**版本**:V1.32.5(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md))
-**更新日期**:2026-05-17
+**当前阶段**:Phase 2 前期(V1.36.0 · 16 expert + 30/32 skill active (23 production + 7 script) + 0 rollout + 2 vision;V1.x rollout 收尾)
+**版本**:V1.36.0(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md))
+**更新日期**:2026-05-18
**模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理)
---
@@ -26,7 +26,7 @@
| 04 | [skills-bugtracker](docs/charter/04-skills-bugtracker.md) | Skills 自进化机制 + Bug Tracker 多适配器 |
| 05 | [install-deploy](docs/charter/05-install-deploy.md) | 按需安装 + 架构图 + 快速开始 + 工作流 + 技术栈 + 闭环 + 升级 + 协作 + 跨 AI |
| 06 | [test-architecture](docs/charter/06-test-architecture.md) | 测试架构深度 + 关键反问 + 开放问题 + 术语表 |
-| 07 | [runtime-license](docs/charter/07-runtime-license.md) | V1.32.5 运行时层 + LICENSE / CHANGELOG / 项目当前状态 |
+| 07 | [runtime-license](docs/charter/07-runtime-license.md) | V1.36.0 运行时层 + LICENSE / CHANGELOG / 项目当前状态 |
## 跨文件链接迁移指引
diff --git a/NOTICE.md b/NOTICE.md
index 9410484..fa509b5 100644
--- a/NOTICE.md
+++ b/NOTICE.md
@@ -2,19 +2,19 @@
Test-Agent 本体 MIT License。以下组件保留各自上游协议。
-## Upstream Skills(项目内 03-技能定义/ 子目录)
+## Upstream Skills(项目内 skills/ 子目录)
| 路径 | 上游 | 协议 |
|------|------|------|
-| `03-技能定义/darwin-skill/` | [alchaincyf/darwin-skill](https://github.com/alchaincyf/darwin-skill) | MIT |
-| `03-技能定义/karpathy-guidelines/` | [forrestchang/andrej-karpathy-skills](https://github.com/forrestchang/andrej-karpathy-skills) | MIT |
-| `03-技能定义/nuwa-skill/` | [alchaincyf/nuwa-skill](https://github.com/alchaincyf/nuwa-skill) | MIT |
+| `skills/darwin-skill/` | [alchaincyf/darwin-skill](https://github.com/alchaincyf/darwin-skill) | MIT |
+| `skills/karpathy-guidelines/` | [forrestchang/andrej-karpathy-skills](https://github.com/forrestchang/andrej-karpathy-skills) | MIT |
+| `skills/nuwa-skill/` | [alchaincyf/nuwa-skill](https://github.com/alchaincyf/nuwa-skill) | MIT |
各子目录含本地 `LICENSE` 副本(完整 MIT 全文 + 上游作者署名)。
## Python 依赖(主要)
-详见 `04-配置文件/requirements.txt`。常用:
+详见 `config/requirements.txt`。常用:
- pytest(MIT)/ Playwright(Apache 2.0)/ Appium(Apache 2.0)/ pywinauto(BSD-3-Clause)
- JMeter(Apache 2.0)/ Allure(Apache 2.0)— 外部安装
diff --git a/README.md b/README.md
index 67582cb..eb18648 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ tagent init --preset saas-web # or: minimal / mobile-android / security-pent
# → produces .env + tagent.yml + STARTUP.md (5-step onboarding guide)
```
-Matrix-driven config: 8 test types × 6 platforms × 5 LLMs × 6 trackers × 6 channels. In practice, ~12 common combinations are tested in CI; the full 8640-grid is a config matrix, not a coverage claim. See [`04-配置文件/templates/INDEX.md`](04-配置文件/templates/INDEX.md).
+Matrix-driven config: 8 test types × 6 platforms × 5 LLMs × 6 trackers × 6 channels. In practice, ~12 common combinations are tested in CI; the full 8640-grid is a config matrix, not a coverage claim. See [`config/templates/INDEX.md`](config/templates/INDEX.md).
---
@@ -43,8 +43,8 @@ Test-Agent turns any software, EXE, APK, Docker image, or API into a **fully tes
- **16 expert agents** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — functional · security · mobile · desktop · AI model · automotive · pentest …
- **30 active skills** (23 production + 7 script) **+ 2 vision-only** (reference, not executable) **+ 3 meta-skills** — TDD · E2E · regression · pentest · car-CAN-bus · eval-harness · …
-- **49 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · …
-- **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md)
+- **78 production utils** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV · …
+- **Multi-LLM (any provider, plug-and-play)** — 6 built-in (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI-compatible fallback channel** for any other provider (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) via 3 env vars, zero code change. Cookbook: [`config/llm-providers.md`](config/llm-providers.md)
- **BugTracker** — 1 active adapter (Zentao); 5 planned (Jira · GitHub · GitLab · Linear · Webhook, see roadmap)
- **6 notify channels** — WeChat Work · Lark/Feishu · DingTalk · Slack · Email · MS Teams
- **MCP integration** — 6 server modules implemented (test-orchestrator active by default; 5 others ready to enable in `.mcp.json`)
@@ -72,7 +72,7 @@ No Python/Node/Docker required. Download the installer for your platform and sta
1. **All-platform** — Web / API / Android / iOS / WeChat-miniprogram / Windows EXE / macOS / Linux / Electron / game / IoT / audio-video / AI/LLM / blockchain / 车载
2. **All-protocol** — HTTP(S) / gRPC / WebSocket / TCP / UDP / GraphQL / SOAP / MQTT / SSH / serial / Kafka / RabbitMQ / Modbus / CAN-bus / SOME-IP / DoIP / UDS
-3. **Multi-LLM no lock-in (any provider)** — 6 built-in providers via `tagent config use ` (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) plus **OpenAI-compatible fallback** via `tagent config use-compat` for any other (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) — 3 env vars, zero code change. See [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md)
+3. **Multi-LLM no lock-in (any provider)** — 6 built-in providers via `tagent config use ` (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) plus **OpenAI-compatible fallback** via `tagent config use-compat` for any other (Zhipu / Doubao / Kimi / Baichuan / Xunfei / …) — 3 env vars, zero code change. See [`config/llm-providers.md`](config/llm-providers.md)
4. **Learn while using** — `--mode learn` outputs every step with theory references (22 KB cards across 13 domains: tools / coding / foundation / strategy / methods / protocols / platforms / gates / security / AI testing / compliance / process / build-your-own)
5. **Safe-by-default** — sandboxed exec / PII scrub / runtime prompt-injection scan / 4-gate marketplace verify / decisions audit trail
@@ -100,12 +100,12 @@ For project design rationale, architecture decisions, and methodology rationale,
```text
Test-Agent/
├── 00-项目导航.md ← 5-dimension category guide
-├── 01-快速开始/ ← user manual / deploy / config / deliverables
-├── 02-专家定义/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾)
-├── 03-技能定义/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills
-├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json
-├── 05-代码示例/ ← 49 production utils
-├── 06-CICD集成/ ← GitHub Actions + Jenkins
+├── docs/getting-started/ ← user manual / deploy / config / deliverables
+├── agents/ ← 16 expert agents (11 production + 5 script, V1.x rollout 收尾)
+├── skills/ ← 32 business skills (23 production + 7 script + 0 rollout + 2 vision) + 3 meta-skills
+├── config/ ← conftest / pytest.ini / .env / .mcp.json
+├── utils/ ← 78 production utils
+├── ci/ ← GitHub Actions + Jenkins
├── runtime/ ← V1.x runtime layer (router / orchestrator / MCP / web / scheduler / subagent / learning_loop / backends / gateway / tutor / essence_watcher / marketplace)
├── docs/charter/ ← Vision charter (7 split files: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license)
├── docs/theory/ ← 22 teaching KB cards across 13 categories
@@ -127,11 +127,11 @@ Test-Agent/
| Audience | Read |
|----------|------|
-| **First-time user** | [Quick start](01-快速开始/INDEX.md) → [Deploy](01-快速开始/部署说明.md) |
-| **QA engineer** | [User manual](01-快速开始/使用手册.md) → [Skill catalog](03-技能定义/) |
+| **First-time user** | [Quick start](docs/getting-started/INDEX.md) → [Deploy](docs/getting-started/部署说明.md) |
+| **QA engineer** | [User manual](docs/getting-started/使用手册.md) → [Skill catalog](skills/) |
| **Architect / SRE** | [Architecture deep-dive](docs/charter/06-test-architecture.md) → [Runtime](docs/charter/07-runtime-license.md) → [Runtime modules](runtime/INDEX.md) |
-| **Security researcher** | [Pentest expert](02-专家定义/15-渗透测试.md) → [pentest-coordinator](03-技能定义/pentest-coordinator.md) |
-| **Automotive tester** | [Automotive expert](02-专家定义/16-车载测试.md) → [ASIL workflow](03-技能定义/automotive-test.md) |
+| **Security researcher** | [Pentest expert](agents/15-渗透测试.md) → [pentest-coordinator](skills/pentest-coordinator.md) |
+| **Automotive tester** | [Automotive expert](agents/16-车载测试.md) → [ASIL workflow](skills/automotive-test.md) |
| **Contributor** | [CONTRIBUTING.md](CONTRIBUTING.md) → [Marketplace](marketplace/INDEX.md) |
## 🛠️ Tech Stack
diff --git a/README.zh-CN.md b/README.zh-CN.md
index 143cba8..b8a1c4a 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -33,7 +33,7 @@ tagent init --preset 国内-web # 或:minimal / saas-web / mobile-android / s
# → 产 .env + tagent.yml + STARTUP.md(5 步上手指南)
```
-矩阵驱动配置:8 测试类型 × 6 平台 × 5 LLM × 6 tracker × 6 通道。实际 CI 验证约 12 种常用组合;完整 8640 网格是配置矩阵,不是覆盖率声明。见 [`04-配置文件/templates/INDEX.md`](04-配置文件/templates/INDEX.md)。
+矩阵驱动配置:8 测试类型 × 6 平台 × 5 LLM × 6 tracker × 6 通道。实际 CI 验证约 12 种常用组合;完整 8640 网格是配置矩阵,不是覆盖率声明。见 [`config/templates/INDEX.md`](config/templates/INDEX.md)。
---
@@ -44,7 +44,7 @@ Test-Agent 让任何软件 / EXE / APK / Docker 镜像 / API,变成**完整测
- **16 专家 Agent** (11 production + 5 script — V1.x rollout 收尾,见 [ROADMAP.md](ROADMAP.md)) — 功能 · 安全 · 移动 · 桌面 · AI 模型 · 车载 · 渗透 ……
- **30 个活跃 Skill** (23 production + 7 script) **+ 2 个 vision-only**(方法论参考,不可执行)**+ 3 元 Skill** — TDD · E2E · 回归 · 渗透 · 车载 CAN · eval-harness ……
- **49 生产工具** — pytest · Playwright · JMeter · Appium · Burp · Allure · OpenCV ……
-- **多 LLM(任厂商即插即用)** — 内置 6 厂商 (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI 兼容兜底通道** 接其他任意厂商 (智谱 / 豆包 / Kimi / 百川 / 讯飞 / …), 3 个 env 变量, 零代码改动. 速查手册: [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md)
+- **多 LLM(任厂商即插即用)** — 内置 6 厂商 (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama) + **OpenAI 兼容兜底通道** 接其他任意厂商 (智谱 / 豆包 / Kimi / 百川 / 讯飞 / …), 3 个 env 变量, 零代码改动. 速查手册: [`config/llm-providers.md`](config/llm-providers.md)
- **BugTracker** — 1 已实装(禅道);5 计划(Jira · GitHub · GitLab · Linear · Webhook,见 roadmap)
- **6 通知渠道** — 企微 · 飞书 · 钉钉 · Slack · 邮件 · Teams
- **MCP 集成** — 6 模块已实现(test-orchestrator 默认启用;其余 5 件套在 `.mcp.json` 中按需启用)
@@ -72,7 +72,7 @@ curl -fsSL https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.s
1. **全平台** — Web / API / Android / iOS / 微信小程序 / Windows EXE / macOS / Linux / Electron / 游戏 / IoT / 音视频 / AI/LLM / 区块链 / 车载
2. **全协议** — HTTP(S) / gRPC / WebSocket / TCP / UDP / GraphQL / SOAP / MQTT / SSH / 串口 / Kafka / RabbitMQ / Modbus / CAN-bus / SOME-IP / DoIP / UDS
-3. **多 LLM 无锁定(任厂商)** — `tagent config use ` 切换 6 内置 (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama), `tagent config use-compat` 接 **OpenAI 兼容兜底** 任意厂商 (智谱 / 豆包 / Kimi / 百川 / 讯飞 / …), 3 个 env 零代码. 见 [`04-配置文件/llm-providers.md`](04-配置文件/llm-providers.md)
+3. **多 LLM 无锁定(任厂商)** — `tagent config use ` 切换 6 内置 (Claude / OpenAI / Gemini / Qwen / DeepSeek / Ollama), `tagent config use-compat` 接 **OpenAI 兼容兜底** 任意厂商 (智谱 / 豆包 / Kimi / 百川 / 讯飞 / …), 3 个 env 零代码. 见 [`config/llm-providers.md`](config/llm-providers.md)
4. **边用边学** — `--mode learn` 每步输出含**理论引用**(22 卡跨 13 大类:工具 / 编程 / 基础理论 / 策略 / 方法 / 协议 / 平台 / 门禁 / 安全 / AI 测试 / 合规 / 流程 / Build-Your-Own)
5. **safe-by-default** — 沙箱 / PII 脱敏 / 运行时 Prompt 注入扫描 / 4 关 Marketplace 验证 / decisions 审计链
@@ -94,12 +94,12 @@ curl -fsSL https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.s
```text
Test-Agent/
├── 00-项目导航.md ← 5 维度分类速查
-├── 01-快速开始/ ← 使用手册 / 部署 / 配置 / 交付物
-├── 02-专家定义/ ← 16 个专家 Agent (11 production + 5 script + 0 rollout)
-├── 03-技能定义/ ← 32 个业务 Skill (23 production + 7 script + 0 rollout + 2 vision) + 3 个元 Skill
-├── 04-配置文件/ ← conftest / pytest.ini / .env / .mcp.json
-├── 05-代码示例/ ← 49 个生产工具
-├── 06-CICD集成/ ← GitHub Actions + Jenkins
+├── docs/getting-started/ ← 使用手册 / 部署 / 配置 / 交付物
+├── agents/ ← 16 个专家 Agent (11 production + 5 script + 0 rollout)
+├── skills/ ← 32 个业务 Skill (23 production + 7 script + 0 rollout + 2 vision) + 3 个元 Skill
+├── config/ ← conftest / pytest.ini / .env / .mcp.json
+├── utils/ ← 78 个生产工具
+├── ci/ ← GitHub Actions + Jenkins
├── runtime/ ← V1.x 运行时(router/orchestrator/MCP/web/scheduler/subagent/learning_loop/backends/gateway/tutor/essence_watcher/marketplace)
├── docs/charter/ ← 愿景宪章(7 子文件: vision-dimensions / coverage-matrix / agentchat-protocol / skills-bugtracker / install-deploy / test-architecture / runtime-license)
├── docs/theory/ ← 22 教学 KB 卡片跨 13 大类
@@ -121,11 +121,11 @@ Test-Agent/
| 角色 | 阅读 |
|------|------|
-| **首次用户** | [快速开始](01-快速开始/INDEX.md) → [部署说明](01-快速开始/部署说明.md) |
-| **QA 工程师** | [使用手册](01-快速开始/使用手册.md) → [Skill 目录](03-技能定义/) |
+| **首次用户** | [快速开始](docs/getting-started/INDEX.md) → [部署说明](docs/getting-started/部署说明.md) |
+| **QA 工程师** | [使用手册](docs/getting-started/使用手册.md) → [Skill 目录](skills/) |
| **架构师 / SRE** | [架构深度](docs/charter/06-test-architecture.md) → [Runtime 章节](docs/charter/07-runtime-license.md) → [Runtime 模块](runtime/INDEX.md) |
-| **安全研究员** | [渗透专家](02-专家定义/15-渗透测试.md) → [pentest-coordinator](03-技能定义/pentest-coordinator.md) |
-| **车载测试** | [车载专家](02-专家定义/16-车载测试.md) → [ASIL 工作流](03-技能定义/automotive-test.md) |
+| **安全研究员** | [渗透专家](agents/15-渗透测试.md) → [pentest-coordinator](skills/pentest-coordinator.md) |
+| **车载测试** | [车载专家](agents/16-车载测试.md) → [ASIL 工作流](skills/automotive-test.md) |
| **贡献者** | [CONTRIBUTING.md](CONTRIBUTING.md) → [Marketplace](marketplace/INDEX.md) |
## 🛠️ 技术栈
diff --git a/ROADMAP.md b/ROADMAP.md
index 361ee8c..a405f56 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,7 +1,7 @@
# Test-Agent V1.x ROADMAP
> 项目终态目标:每个 expert / skill 真 LLM-driven / script-backed 实装,**绝不输出 mock 数据**。
-> 当前状态:V1.32.5 (**expert rollout 收尾 + skill rollout 完成(16/16)**)
+> 当前状态:V1.42.0 (**expert rollout 收尾 + skill rollout 完成(16/16)+ Phase 3 完成 + Phase 4 完成 + Phase 5 完成**)
> - **expert 16/16 active**(11 production + 5 script);0 rollout。
> - **skill 30/32 active**(23 production + 7 script);0 rollout;2 暂为 V2 vision 方法论参考。
> - 3 meta-skill(nuwa-skill / darwin-skill / karpathy-guidelines)独立,工具属性,不在 32 业务 skill 数内。
@@ -129,6 +129,14 @@
---
+## V1.34-V1.36 能力扩展
+
+- **V1.34**: script_bridge.py 桥接 5 独立脚本进 orchestrator pipeline
+- **V1.35**: 11 深度审计模块 (flaky guard / API security v2 / data factory v2 / perf orchestrator / event harness / visual regression / ML prioritizer / differential tester / EU AI Act / supply chain)
+- **V1.36**: 6 延期模块 (chaos v2 / state machine v2 / DB test v2 / BDD v2 / carbon scheduler / canary config) + CVE-2025-71176 fix + 深度审查65发现全修
+
+---
+
## V2.x vision — 2 skill(暂留方法论参考形态)
| Skill | 当前形态 | V2 路线 |
@@ -162,11 +170,11 @@
**绝不输出 mock 数据糊弄用户。**
V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地:
-- **registry 单源**: catalog 解析 `02-专家定义/*.md` `EXPERT_IMPL_STATUS` + `03-技能定义/*.md` `SKILL_IMPL_STATUS` frontmatter,实装状态来源唯一
+- **registry 单源**: catalog 解析 `agents/*.md` `EXPERT_IMPL_STATUS` + `skills/*.md` `SKILL_IMPL_STATUS` frontmatter,实装状态来源唯一
- **router 软警告**: `_validate_against_catalog` 检测 rollout / vision / unknown → 加 issues 并降 confidence 0.3
- **orchestrator 硬拒**: `execute_node` 对 expert / skill 任意 rollout / vision / unknown 返回 `returncode=2` + stderr "未实装",绝不走 no-op "documented step recorded" 假成功路径
- 用户路由 0 个 in-rollout expert / 0 个 in-rollout skill / 2 个 vision skill 时**收到明确说明**,而非伪装成"已运行"的 mock 输出
-- 详情见 [02-专家定义/01-测试主管.md](02-专家定义/01-测试主管.md) 路由表注释
+- 详情见 [agents/01-测试主管.md](agents/01-测试主管.md) 路由表注释
---
@@ -193,4 +201,20 @@ V1.14.0+1 (PR X4) 起,双 layer 防 mock 已落地:
| V1.29.0 | 2026-05-16 | **skill rollout #8** — pentest-exploit + pentest-report 双 skill LLM-driven 生产落地 (exploit: 沙箱内验证 PoC + 不可破坏性约束 · report: working PoC 嵌入 + CWE/CVSS/PoC/修复 4 维) | 16 expert + 18/32 production |
| V1.30.0 | 2026-05-16 | **skill rollout #9** — automotive-test + automotive-can-bus-test 双 skill LLM-driven 生产落地 (主编排: 10 阶段 HARA→报告 · CAN: CAN/CAN-FD/SOME-IP 协议一致性 + dbc 解析) | 16 expert + 20/32 production |
| V1.31.0 | 2026-05-16 | **skill rollout #10 (收尾)** — automotive-adas-scenario + automotive-ota-update-test + automotive-hil-loop-test 3 skill LLM-driven 生产落地 (ADAS: AEB/ACC/LKA + CARLA 仿真 · OTA: 6 校验 + UN R156/GB 44496 合规 · HIL: MIL/SIL/HIL 三环 + dSPACE). **V1.x rollout 完成 — 23/32 production + 7 script + 0 rollout + 2 vision.** | 16 expert + 23/32 production (0 rollout 待) |
+| V1.32.0 | 2026-05-17 | 深审32发现全修 + 版本号全同步 + 私源泄漏清洗 | 16 expert + 23/32 production |
+| V1.32.1 | 2026-05-17 | CONTRIBUTING skill count 33→32 fix + 版本号同步 | 16 expert + 23/32 production |
+| V1.32.2 | 2026-05-17 | Security hardening batch: CWE-78 fix + credential removal + CORS + WebSocket leak + XML escape | 16 expert + 23/32 production |
+| V1.32.3 | 2026-05-17 | Refactor: _stub_response dispatch table + fuzzer ALL_PAYLOADS hoist | 16 expert + 23/32 production |
+| V1.32.4 | 2026-05-17 | Honesty pass: remove aspirational numbers + split overlong functions | 16 expert + 23/32 production |
+| V1.32.5 | 2026-05-17 | Security: shell injection + hardcoded creds + silent failures | 16 expert + 23/32 production |
+| V1.33.0 | 2026-05-17 | MASTER_PLAN 38/38 items across 8 phases complete | 16 expert + 23/32 production |
+| V1.34.0 | 2026-05-18 | Phase 1-5 initial audit: 18 additions (settings/IDE/Docker/Prometheus/streaming/PBT/contract/schema fuzz/compliance/DORA) | 16 expert + 23/32 production |
+| V1.35.0 | 2026-05-18 | Deep audit 11 core modules (flaky guard/API security v2/data factory v2/perf/e2e event harness/visual regression/ML prioritizer/differential/EU AI Act/supply chain) | 16 expert + 23/32 production |
+| V1.36.0 | 2026-05-18 | Remaining 6 deferred modules + CVE-2025-71176 fix + 深度审查65发现全修 | 16 expert + 30/32 active (23 production + 7 script) |
+| V1.37.0 | 2026-05-18 | Phase 2 charter closure: Bug 5适配器(YAML门禁+按需安装) + HIGH 2(H16/H18) + MEDIUM 4(M12/M14/M15/M19) + contract gate + utils tests | 16 expert + 30/32 active · Phase 2 complete |
+| V1.38.0 | 2026-05-18 | Phase 3.1 伦理/偏见审计: fairness_auditor.py (dataset bias + 6 model fairness metrics + intersectional + decision audit) + 20 tests + ai_validator bias audit pipeline | 16 expert + 30/32 active · 1/3 Phase 3 done |
+| V1.39.0 | 2026-05-18 | Phase 3.2 沉默故障检测: silent_failure_detector.py (threshold drift + Mann-Kendall + OLS trend + sliding window + multi-source batch) + 21 tests + tracing/web_vitals/prometheus collectors | 16 expert + 30/32 active · 2/3 Phase 3 done |
+| V1.40.0 | 2026-05-18 | Phase 3.3 缺席者场景注入: absentee_scenario_injector.py (9 absentee groups × 21 canonical scenarios + charter generation + coverage reporting) + 20 tests | 16 expert + 30/32 active · PHASE 3 COMPLETE |
+| V1.41.0 | 2026-05-19 | Phase 4 证据链可采信性: evidence_chain.py (SHA-256 hash chain + multi-source collection + ISO 27001/SOC2/NIST 800-53/GDPR compliance mapping + JSON/Markdown export + integrity verification) + 39 tests + ai_validator evidence chain audit pipeline | 16 expert + 30/32 active · PHASE 4 DELIVERED |
+| V1.42.0 | 2026-05-19 | Phase 5 神圣性与跨文化禁忌: taboo_matrix.py (135 entries × 16 locales × 5 dimensions: words/colors/numbers/holidays/sacred_contexts) + i18n_checker taboo audit extension (6 functions) + 84 tests | 16 expert + 30/32 active · PHASE 5 DELIVERED |
| V2.0.0 | TBD | V2.x 路线图启动 | 16/16 + V2 |
diff --git a/SECURITY.md b/SECURITY.md
index b014f98..764624a 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -24,7 +24,7 @@
- 标题约定:`[SECURITY] 漏洞简述`
- 正文:详细复现步骤 + 影响范围 + PoC(如有)
-### 响应时间
+### 响应时间(best-effort,志愿者维护)
| 严重级别 | 响应 | 修复 |
|---------|------|------|
@@ -66,11 +66,11 @@
| 资产 | 类型 |
|------|------|
-| `02-专家定义/15-渗透测试.md` | 渗透测试 Agent(调用 sqlmap / Metasploit / Hydra 等真实攻击工具) |
-| `03-技能定义/pentest-*.md`(7 项) | 渗透 skill 系列(api / coordinator / exploit / recon / report / vuln / web) |
-| `05-代码示例/api_security_scanner.py` | API 安全扫描器(SSRF / IDOR / JWT / CSRF; 默认 refuse,需 `TAGENT_PENTEST_AUTHORIZED=1` + AWS metadata 探针需 `confirm_metadata_probe=True`) |
-| `05-代码示例/ai_adversarial.py` | AI 对抗测试 / LLM 越狱 / Prompt Injection / 成员推断攻击(含 JAILBREAK_PROMPTS + PROMPT_INJECTION_TEMPLATES 模板; 4 个远端 op 默认 refuse,需 `TAGENT_PENTEST_AUTHORIZED=1`; `test_llm_jailbreak` / `test_prompt_injection` / `membership_inference_basic` 三个 HIGH 风险 op 额外需 `confirm_offensive=True` 或 `confirm_inference_attack=True` kwarg) |
-| `05-代码示例/security_scanner.py` | 通用安全扫描器(调用 ZAP / Burp) |
+| `agents/15-渗透测试.md` | 渗透测试 Agent(调用 sqlmap / Metasploit / Hydra 等真实攻击工具) |
+| `skills/pentest-*.md`(7 项) | 渗透 skill 系列(api / coordinator / exploit / recon / report / vuln / web) |
+| `utils/api_security_scanner.py` | API 安全扫描器(SSRF / IDOR / JWT / CSRF; 默认 refuse,需 `TAGENT_PENTEST_AUTHORIZED=1` + AWS metadata 探针需 `confirm_metadata_probe=True`) |
+| `utils/ai_adversarial.py` | AI 对抗测试 / LLM 越狱 / Prompt Injection / 成员推断攻击(含 JAILBREAK_PROMPTS + PROMPT_INJECTION_TEMPLATES 模板; 4 个远端 op 默认 refuse,需 `TAGENT_PENTEST_AUTHORIZED=1`; `test_llm_jailbreak` / `test_prompt_injection` / `membership_inference_basic` 三个 HIGH 风险 op 额外需 `confirm_offensive=True` 或 `confirm_inference_attack=True` kwarg) |
+| `utils/security_scanner.py` | 通用安全扫描器(调用 ZAP / Burp) |
**操作者必须**:
@@ -89,9 +89,9 @@
| utils 文件 | env var | 守护操作 | 额外约束 |
|------|------|------|------|
-| `05-代码示例/chaos_helper.py` | `TAGENT_CHAOS_AUTHORIZED=1` | 混沌注入 + path / host validation | – |
-| `05-代码示例/db_test_helper.py` | `TAGENT_DB_TEST_AUTHORIZED=1` | `explain_query` / `benchmark_query` / `test_migration` / `test_postgres_backup_restore` | `test_postgres_backup_restore` 额外需 `confirm_destructive=True` kwarg;SQL identifier + cmd 双白名单 |
-| `05-代码示例/desktop_driver.py` | `TAGENT_DESKTOP_AUTHORIZED=1`(仅 macOS ops) | macOS: `open_macos_app` / `macos_menu`;跨平台: `get_windows_app` / `launch_electron` 路径校验 | macOS ops 需 platform=darwin + AppleScript identifier 白名单;跨平台 driver 接受的 exe / executable 路径必须绝对 + 存在 + 普通文件 + 非 symlink |
+| `utils/chaos_helper.py` | `TAGENT_CHAOS_AUTHORIZED=1` | 混沌注入 + path / host validation | – |
+| `utils/db_test_helper.py` | `TAGENT_DB_TEST_AUTHORIZED=1` | `explain_query` / `benchmark_query` / `test_migration` / `test_postgres_backup_restore` | `test_postgres_backup_restore` 额外需 `confirm_destructive=True` kwarg;SQL identifier + cmd 双白名单 |
+| `utils/desktop_driver.py` | `TAGENT_DESKTOP_AUTHORIZED=1`(仅 macOS ops) | macOS: `open_macos_app` / `macos_menu`;跨平台: `get_windows_app` / `launch_electron` 路径校验 | macOS ops 需 platform=darwin + AppleScript identifier 白名单;跨平台 driver 接受的 exe / executable 路径必须绝对 + 存在 + 普通文件 + 非 symlink |
**与武器化代码区分**: 上述 utils 设计用途是**测试**而非**攻击**,但调用时仍执行任意 SQL / shell / AppleScript。env var gate 是误调防护,不豁免操作者的环境隔离责任。
diff --git a/VERSION b/VERSION
index 949ff32..a50908c 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.32.5
+1.42.0
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md" "b/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md"
similarity index 99%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md"
rename to "agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md"
index 4cf6ede..e384963 100644
--- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/01-\346\265\213\350\257\225\344\270\273\347\256\241.md"
+++ "b/agents/01-\346\265\213\350\257\225\344\270\273\347\256\241.md"
@@ -5,7 +5,7 @@ tools: Read, Write, Bash, Grep, Glob
EXPERT_IMPL_STATUS: production
---
-> ℹ️ **V1.32.5 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout(V1.x rollout 收尾)**。详见 [ROADMAP.md](../ROADMAP.md)。
+> ℹ️ **V1.36.0 实装状态**: 16 expert 全部 **16 active**(11 production + 5 script-backed),**0 rollout(V1.x rollout 收尾)**。详见 [ROADMAP.md](../ROADMAP.md)。
> runtime/router + orchestrator 防 mock 已落地 — 路由到未实装 expert 返回明确「未实装」说明,不输出 mock 数据。
你是一位拥有15年经验的测试技术总监,带领过多个大型互联网项目的测试团队。你深谙测试工程化,善于风险识别、资源调度和质量决策。
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/02-\351\234\200\346\261\202\345\210\206\346\236\220.md" "b/agents/02-\351\234\200\346\261\202\345\210\206\346\236\220.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/02-\351\234\200\346\261\202\345\210\206\346\236\220.md"
rename to "agents/02-\351\234\200\346\261\202\345\210\206\346\236\220.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/03-\347\224\250\344\276\213\350\256\276\350\256\241.md" "b/agents/03-\347\224\250\344\276\213\350\256\276\350\256\241.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/03-\347\224\250\344\276\213\350\256\276\350\256\241.md"
rename to "agents/03-\347\224\250\344\276\213\350\256\276\350\256\241.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/04-\347\216\257\345\242\203\347\256\241\347\220\206.md" "b/agents/04-\347\216\257\345\242\203\347\256\241\347\220\206.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/04-\347\216\257\345\242\203\347\256\241\347\220\206.md"
rename to "agents/04-\347\216\257\345\242\203\347\256\241\347\220\206.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/05-\346\225\260\346\215\256\345\207\206\345\244\207.md" "b/agents/05-\346\225\260\346\215\256\345\207\206\345\244\207.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/05-\346\225\260\346\215\256\345\207\206\345\244\207.md"
rename to "agents/05-\346\225\260\346\215\256\345\207\206\345\244\207.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md" "b/agents/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md"
similarity index 98%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md"
rename to "agents/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md"
index 96ab0c3..4861d8f 100644
--- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md"
+++ "b/agents/06-\350\207\252\345\212\250\345\214\226\350\204\232\346\234\254.md"
@@ -16,7 +16,7 @@ project_root/
├── conftest.py # 唯一权威 conftest(项目根)
├── pytest.ini
├── .env
-├── utils/ # 部署自 05-代码示例/
+├── utils/ # 部署自 utils/
│ ├── api_retry_util.py
│ ├── data_factory.py
│ ├── data_masking.py
@@ -43,7 +43,7 @@ project_root/
└── 测试用例/、测试数据/、执行日志/
```
-> 注:`conftest.py` 仅一份,位于项目根(部署来自 04-配置文件/conftest.py)。`workspace/自动化脚本/python/` 内**不再放 conftest.py**。
+> 注:`conftest.py` 仅一份,位于项目根(部署来自 config/conftest.py)。`workspace/自动化脚本/python/` 内**不再放 conftest.py**。
> import 路径:`from utils.api_retry_util import call_with_retry` 等;conftest 已注入 sys.path。
### 命名规范
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/07-\346\265\213\350\257\225\346\211\247\350\241\214.md" "b/agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/07-\346\265\213\350\257\225\346\211\247\350\241\214.md"
rename to "agents/07-\346\265\213\350\257\225\346\211\247\350\241\214.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/08-Bug\347\256\241\347\220\206.md" "b/agents/08-Bug\347\256\241\347\220\206.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/08-Bug\347\256\241\347\220\206.md"
rename to "agents/08-Bug\347\256\241\347\220\206.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/09-\346\212\245\345\221\212\347\224\237\346\210\220.md" "b/agents/09-\346\212\245\345\221\212\347\224\237\346\210\220.md"
similarity index 95%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/09-\346\212\245\345\221\212\347\224\237\346\210\220.md"
rename to "agents/09-\346\212\245\345\221\212\347\224\237\346\210\220.md"
index e673b65..1f744f7 100644
--- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/09-\346\212\245\345\221\212\347\224\237\346\210\220.md"
+++ "b/agents/09-\346\212\245\345\221\212\347\224\237\346\210\220.md"
@@ -207,18 +207,22 @@ python -m utils.generate_report \
--notify
```
-## 日报触发(GitHub Actions schedule)
+## 定时触发(GitHub Actions schedule)
+
+参见 `.github/workflows/selftest-weekly.yml`(每周自检,可改为日报 cron):
```yaml
-# .github/workflows/daily-report.yml
+# 日报示例:放 .github/workflows/daily-report.yml
on:
schedule:
- - cron: '30 9 * * 1-5' # 每个工作日 17:30 UTC+8 = 9:30 UTC
+ - cron: '30 1 * * 1-5' # 每个工作日 9:30 UTC+8
jobs:
daily:
runs-on: ubuntu-latest
steps:
- - run: python -m utils.generate_report --data ... --notify
+ - uses: actions/checkout@v6
+ - run: pip install -e runtime/
+ - run: tagent run --type smoke --notify
```
## 协作输出
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/10-\347\247\273\345\212\250\346\265\213\350\257\225.md" "b/agents/10-\347\247\273\345\212\250\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/10-\347\247\273\345\212\250\346\265\213\350\257\225.md"
rename to "agents/10-\347\247\273\345\212\250\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/11-\346\241\214\351\235\242\346\265\213\350\257\225.md" "b/agents/11-\346\241\214\351\235\242\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/11-\346\241\214\351\235\242\346\265\213\350\257\225.md"
rename to "agents/11-\346\241\214\351\235\242\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/12-\350\247\206\350\247\211\346\270\270\346\210\217\346\265\213\350\257\225.md" "b/agents/12-\350\247\206\350\247\211\346\270\270\346\210\217\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/12-\350\247\206\350\247\211\346\270\270\346\210\217\346\265\213\350\257\225.md"
rename to "agents/12-\350\247\206\350\247\211\346\270\270\346\210\217\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/13-\347\263\273\347\273\237\351\233\206\346\210\220\346\265\213\350\257\225.md" "b/agents/13-\347\263\273\347\273\237\351\233\206\346\210\220\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/13-\347\263\273\347\273\237\351\233\206\346\210\220\346\265\213\350\257\225.md"
rename to "agents/13-\347\263\273\347\273\237\351\233\206\346\210\220\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md" "b/agents/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md"
similarity index 88%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md"
rename to "agents/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md"
index f3896f8..ecce03f 100644
--- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md"
+++ "b/agents/14-AI\346\250\241\345\236\213\346\265\213\350\257\225.md"
@@ -126,6 +126,8 @@ def test_inference_latency():
## 公平性测试
+### 快速检查(单敏感属性 + 准确率 gap)
+
```python
# ai/tests/test_fairness.py
from utils.ai_validator import fairness_metrics
@@ -142,6 +144,34 @@ def test_gender_fairness():
assert diff < 0.05, f"性别准确率差 {diff:.3f} 过大"
```
+### 完整偏见审计(6 指标 + 交叉分析)
+
+```python
+from fairness_auditor import (
+ audit_dataset_bias,
+ audit_model_fairness,
+ audit_intersectional,
+ export_bias_report,
+ summary,
+)
+
+# 数据集偏见检测
+r1 = audit_dataset_bias(y_true, sensitive, group_names=["male", "female"])
+print(summary(r1)) # CI 友好单行
+export_bias_report(r1) # → workspace/执行日志/ai-fairness/
+
+# 模型公平性(6 指标:DI / SPD / EO / 均衡几率 / 校准 / 预测对等)
+r2 = audit_model_fairness(y_true, y_pred, sensitive, group_names=["male", "female"])
+assert r2.overall_severity == "pass", f"Fairness FAIL: {summary(r2)}"
+
+# 交叉公平性(gender × race 等多敏感属性)
+r3 = audit_intersectional(y_true, y_pred, {
+ "gender": gender_arr,
+ "race": race_arr,
+})
+export_bias_report(r3)
+```
+
## LLM 应用测试
```python
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/15-\346\270\227\351\200\217\346\265\213\350\257\225.md" "b/agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/15-\346\270\227\351\200\217\346\265\213\350\257\225.md"
rename to "agents/15-\346\270\227\351\200\217\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/16-\350\275\246\350\275\275\346\265\213\350\257\225.md" "b/agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md"
similarity index 100%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/16-\350\275\246\350\275\275\346\265\213\350\257\225.md"
rename to "agents/16-\350\275\246\350\275\275\346\265\213\350\257\225.md"
diff --git "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md" b/agents/README.md
similarity index 92%
rename from "02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md"
rename to agents/README.md
index 3f8224f..92a3fb5 100644
--- "a/02-\344\270\223\345\256\266\345\256\232\344\271\211/README.md"
+++ b/agents/README.md
@@ -1,6 +1,6 @@
-# 02-专家定义 索引
+# agents 索引
-**16 个 Agent**,按域分三类:核心通用流程 9 + 平台专项扩展 5 + 垂直领域 2;按实装状态:**11 production + 5 script + 0 rollout**(详见根目录 [ROADMAP.md](../ROADMAP.md))。
+**16 个 Agent**,按域分三类:核心通用流程 9 (含 test-lead 协调者) + 平台专项扩展 5 + 垂直领域 2。test-lead 自身不计入被协调专家清单时核心为 8 位。按实装状态:**11 production + 5 script + 0 rollout**(详见根目录 [ROADMAP.md](../ROADMAP.md))。
顶层导航见 [00-项目导航.md](../00-项目导航.md)。
diff --git "a/06-CICD\351\233\206\346\210\220/CICD\351\233\206\346\210\220\350\257\264\346\230\216.md" "b/ci/CICD\351\233\206\346\210\220\350\257\264\346\230\216.md"
similarity index 100%
rename from "06-CICD\351\233\206\346\210\220/CICD\351\233\206\346\210\220\350\257\264\346\230\216.md"
rename to "ci/CICD\351\233\206\346\210\220\350\257\264\346\230\216.md"
diff --git "a/06-CICD\351\233\206\346\210\220/INDEX.md" b/ci/INDEX.md
similarity index 86%
rename from "06-CICD\351\233\206\346\210\220/INDEX.md"
rename to ci/INDEX.md
index d0324d9..8765a48 100644
--- "a/06-CICD\351\233\206\346\210\220/INDEX.md"
+++ b/ci/INDEX.md
@@ -1,4 +1,4 @@
-# 06-CICD集成 索引
+# ci 索引
> 顶层导航见根目录 `00-项目导航.md`;流水线配置详解见本目录 `CICD集成说明.md`。
@@ -18,10 +18,10 @@
|---------|------|-----|
| `.github/workflows/ci.yml` | **本仓库自身 CI**:Ruff / 模板自检 / 文件统计 / 敏感文件防护 / 链接校验 | GitHub Actions(本 repo) |
| `.github/workflows/codeql.yml` | **本仓库自身 CodeQL**:python + actions 安全扫描 | GitHub Actions(本 repo) |
-| `06-CICD集成/github-actions-test.yml` | **用户分发模板**:用户 fork/部署后跑业务测试 | 用户自己的 repo |
-| `06-CICD集成/jenkins-pipeline.groovy` | **用户分发模板**:Jenkins 流水线 | 用户自己的 Jenkins |
+| `ci/github-actions-test.yml` | **用户分发模板**:用户 fork/部署后跑业务测试 | 用户自己的 repo |
+| `ci/jenkins-pipeline.groovy` | **用户分发模板**:Jenkins 流水线 | 用户自己的 Jenkins |
-> install.sh 在部署时把 `06-CICD集成/github-actions-test.yml` 拷贝到 `/.github/workflows/test.yml`,把 `jenkins-pipeline.groovy` 拷贝到 `/Jenkinsfile`。
+> install.sh 在部署时把 `ci/github-actions-test.yml` 拷贝到 `/.github/workflows/test.yml`,把 `jenkins-pipeline.groovy` 拷贝到 `/Jenkinsfile`。
## 流水线总览(用户分发模板)
@@ -66,4 +66,4 @@
|------|--------|
| `github-actions-test.yml` 加 stage | `CICD集成说明.md` 流水线表 + `配置清单.md` Secrets 表 |
| `jenkins-pipeline.groovy` 加 credentials | `CICD集成说明.md` Jenkins Credentials 段 |
-| 门禁阈值变更 | `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*` + `02-专家定义/01-测试主管.md::QUALITY_GATES` |
+| 门禁阈值变更 | `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*` + `agents/01-测试主管.md::QUALITY_GATES` |
diff --git "a/06-CICD\351\233\206\346\210\220/github-actions-test.yml" b/ci/github-actions-test.yml
similarity index 100%
rename from "06-CICD\351\233\206\346\210\220/github-actions-test.yml"
rename to ci/github-actions-test.yml
diff --git "a/06-CICD\351\233\206\346\210\220/jenkins-pipeline.groovy" b/ci/jenkins-pipeline.groovy
similarity index 100%
rename from "06-CICD\351\233\206\346\210\220/jenkins-pipeline.groovy"
rename to ci/jenkins-pipeline.groovy
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/.env.example" b/config/.env.example
similarity index 84%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/.env.example"
rename to config/.env.example
index 50b2843..7b3f3a5 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/.env.example"
+++ b/config/.env.example
@@ -58,9 +58,13 @@ ZENTAO_PASSWORD=your_zentao_password
# 企业微信群机器人 webhook
WECHAT_WEBHOOK_URL=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=your-key
# 飞书自定义机器人 webhook
-FEISHU_WEBHOOK=https://open.feishu.cn/open-apis/bot/v2/hook/your-token
+FEISHU_WEBHOOK_URL=https://open.feishu.cn/open-apis/bot/v2/hook/your-token
# 钉钉自定义机器人 webhook
-DINGTALK_WEBHOOK=https://oapi.dingtalk.com/robot/send?access_token=your-token
+DINGTALK_WEBHOOK_URL=https://oapi.dingtalk.com/robot/send?access_token=your-token
+# Slack incoming webhook
+SLACK_WEBHOOK_URL=https://hooks.slack.com/services/your-webhook-url
+# Teams incoming webhook
+TEAMS_WEBHOOK_URL=https://your-org.webhook.office.com/webhookb2/...
# ===== 通知 - 应用消息(如需要使用 corp 应用消息而非群机器人,可填)=====
WECHAT_CORP_ID=
@@ -93,6 +97,27 @@ APP_SRC_PATH=./src
# ===== 日志级别: DEBUG | INFO | WARNING | ERROR =====
LOG_LEVEL=INFO
+# ===== LLM Provider(runtime 必需)=====
+# 主 provider:claude | openai | gemini | deepseek | zhipu | ollama | stub
+TAGENT_LLM_PROVIDER=stub
+# 备用 provider(主 provider 不可用时自动切换)
+# TAGENT_LLM_PROVIDER_FALLBACK=ollama
+# API key(按实际 provider 填写对应的)
+# ANTHROPIC_API_KEY=sk-ant-xxx
+# OPENAI_API_KEY=sk-xxx
+# GEMINI_API_KEY=xxx
+# DEEPSEEK_API_KEY=sk-xxx
+# DASHSCOPE_API_KEY=sk-xxx
+# 自定义 API base(用于 Zhipu / 代理 / 私有部署)
+# TAGENT_LLM_API_BASE=https://open.bigmodel.cn/api/paas/v4
+# TAGENT_LLM_API_KEY=xxx
+# 模型名(按 provider 选填)
+# TAGENT_LLM_MODEL=claude-sonnet-4-6
+# TAGENT_LLM_MODEL_FALLBACK=qwen2.5:7b
+# 超时和重试
+# TAGENT_LLM_TIMEOUT_SECONDS=60
+# TAGENT_LLM_MAX_RETRIES=2
+
# ============================================================
# 扩展平台测试(移动 / 桌面 / 视觉 / 系统集成 / AI)
# 按实际需要启用对应字段,无需的子集留空即可
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/.mcp.json" b/config/.mcp.json
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/.mcp.json"
rename to config/.mcp.json
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/INDEX.md" b/config/INDEX.md
similarity index 95%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/INDEX.md"
rename to config/INDEX.md
index 315310b..2d14ca5 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/INDEX.md"
+++ b/config/INDEX.md
@@ -1,6 +1,6 @@
-# 04-配置文件 索引
+# config 索引
-> 顶层导航见根目录 `00-项目导航.md`;配置项详解见 `01-快速开始/配置清单.md`。
+> 顶层导航见根目录 `00-项目导航.md`;配置项详解见 `docs/getting-started/配置清单.md`。
## 文件清单
@@ -27,7 +27,7 @@
- `.env` 严禁提交 Git(默认已在 `.gitignore`)
- 真实凭据(`TEST_DB_PASSWORD` / `ZENTAO_PASSWORD` / `WECHAT_WEBHOOK_URL` 等)只放 `.env` 或 GitHub Secrets / Jenkins Credentials
-- 修改 `.env.example` 加新字段时,必须同步 `conftest.py::EnvConfig` 与 `01-快速开始/配置清单.md`
+- 修改 `.env.example` 加新字段时,必须同步 `conftest.py::EnvConfig` 与 `docs/getting-started/配置清单.md`
## 同步链路(宪章§1 同步铁律)
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/conftest.py" b/config/conftest.py
similarity index 95%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/conftest.py"
rename to config/conftest.py
index 52bc5c2..8e7b7ab 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/conftest.py"
+++ b/config/conftest.py
@@ -21,19 +21,27 @@
# 注入 utils 包 + utils 内部模块 到 sys.path
# 部署后: conftest.py 在 $PROJECT_ROOT/, utils 在 $PROJECT_ROOT/utils/
-# 源码仓: conftest.py 在 04-配置文件/, utils 在 ../05-代码示例/
+# 源码仓: conftest.py 在 config/, utils 在 ../utils/
# 双场景都加 sys.path,确保 utils 平铺 import (e.g., `from api_retry_util import ...`) 工作
-_PROJECT_ROOT = Path(__file__).parent
+_CONFIG_DIR = Path(__file__).parent
+_PROJECT_ROOT = _CONFIG_DIR.parent if (_CONFIG_DIR / ".." / "utils").resolve().is_dir() else _CONFIG_DIR
+if str(_CONFIG_DIR) not in sys.path:
+ sys.path.insert(0, str(_CONFIG_DIR))
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
_UTILS_CANDIDATES = [
_PROJECT_ROOT / "utils", # 部署后路径
- _PROJECT_ROOT.parent / "05-代码示例", # 源码仓路径
+ _PROJECT_ROOT.parent / "utils", # 源码仓路径
]
for _utils_dir in _UTILS_CANDIDATES:
if _utils_dir.is_dir() and str(_utils_dir) not in sys.path:
sys.path.insert(0, str(_utils_dir))
+ # utils 子目录也注入 — V1.42.0 重组后 utils/ 下 12 子目录
+ for _sub in _utils_dir.iterdir():
+ if _sub.is_dir() and not _sub.name.startswith(("_", ".")):
+ if str(_sub) not in sys.path:
+ sys.path.insert(0, str(_sub))
# ===== 环境配置 =====
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md" b/config/llm-providers.md
similarity index 98%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md"
rename to config/llm-providers.md
index 013f24d..46aecd4 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/llm-providers.md"
+++ b/config/llm-providers.md
@@ -11,7 +11,7 @@
1. 选厂商 (路径 A 内置 6 / 路径 B 兼容 5+)
2. 复制对应 export
3. `tagent demo` 验路由
-- **实测有效** (V1.32.5): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79)
+- **实测有效** (V1.36.0): Claude / OpenAI / Gemini / DeepSeek / Qwen / Ollama / **智谱 GLM** (路径 B 实测 20/20 准, 见 PR #79)
- **适用场景**:
- 离线本地 = Ollama / Qwen
- 国内合规 = 智谱 / 豆包 / 通义
@@ -266,7 +266,7 @@ TAGENT_REAL_LLM=1 TAGENT_LLM_PROVIDER= = \
## 9 · 相关文档
-- 配置清单全字段: `01-快速开始/配置清单.md`
+- 配置清单全字段: `docs/getting-started/配置清单.md`
- LLM 客户端实现: `runtime/router/llm_client.py`
- 路由策略详情: `runtime/router/router.py`
- 测试基线: `runtime/tests/test_router_real.py`
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/mcp-server-impl.md" b/config/mcp-server-impl.md
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/mcp-server-impl.md"
rename to config/mcp-server-impl.md
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/pytest.ini" b/config/pytest.ini
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/pytest.ini"
rename to config/pytest.ini
diff --git a/config/quality_gates.yaml b/config/quality_gates.yaml
new file mode 100644
index 0000000..a766ce8
--- /dev/null
+++ b/config/quality_gates.yaml
@@ -0,0 +1,37 @@
+# Quality Gates Configuration (canonical — single source of truth)
+# 门禁引擎 quality_gate_engine.py 读取此文件。
+# 用户修改此文件即可调整门禁阈值,无需改代码。
+# 同步链: 此文件 → CI 文档(ci/CICD集成说明.md) → agent 01(agents/01-测试主管.md)
+# 修改后必须更新 CI 文档和 agent 01 中的对应数值。
+
+smoke:
+ min_pass_rate_pct: 95
+ p0_bug_count_max: 0
+ api_response_ms_max: 3000
+
+regression:
+ min_pass_rate_pct: 90
+ p0_pass_rate_pct: 100
+ p1_pass_rate_pct: 95
+ min_coverage_pct: 80
+ max_flaky_pct: 5
+ new_p0_bug_count_max: 0
+
+performance_ci_quick:
+ min_tps: 20
+ max_p95_ms: 800
+ max_avg_ms: 400
+ max_error_pct: 1.0
+
+performance_full:
+ min_tps: 100
+ max_p95_ms: 500
+ max_avg_ms: 200
+ max_error_pct: 1.0
+ max_baseline_regression_pct: 20
+
+release:
+ require_smoke: true
+ require_regression: true
+ require_perf_full: false
+ require_bug_review: true
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/requirements.txt" b/config/requirements.txt
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/requirements.txt"
rename to config/requirements.txt
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md" b/config/templates/INDEX.md
similarity index 97%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md"
rename to config/templates/INDEX.md
index 4bf85d9..d5aa7af 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/INDEX.md"
+++ b/config/templates/INDEX.md
@@ -1,4 +1,4 @@
-# 配置模板库索引(V1.32.5)
+# 配置模板库索引(V1.36.0)
> `tagent init` 交互向导从本目录读取模板 + matrix · 5 分钟生成 `.env` + `tagent.yml` + `STARTUP.md`。
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/STARTUP.md.tpl" b/config/templates/STARTUP.md.tpl
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/STARTUP.md.tpl"
rename to config/templates/STARTUP.md.tpl
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/base.env.tpl" b/config/templates/base.env.tpl
similarity index 85%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/base.env.tpl"
rename to config/templates/base.env.tpl
index ef6c9bd..8a69ae0 100644
--- "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/base.env.tpl"
+++ b/config/templates/base.env.tpl
@@ -23,7 +23,7 @@ TAGENT_LLM_PROVIDER_FALLBACK=ollama
# ===== Test-Agent 运行时(通常不需改) =====
TAGENT_OTEL_ENABLED=false
-TAGENT_DB_URL=postgresql+psycopg://tagent:tagent@localhost:5432/tagent
+TAGENT_DB_URL={{DB_URL}}
TAGENT_MINIO_ENDPOINT=localhost:9000
-TAGENT_MINIO_ACCESS_KEY=minioadmin
-TAGENT_MINIO_SECRET_KEY=minioadmin
+TAGENT_MINIO_ACCESS_KEY={{MINIO_ACCESS_KEY}}
+TAGENT_MINIO_SECRET_KEY={{MINIO_SECRET_KEY}}
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/base.tagent.yml.tpl" b/config/templates/base.tagent.yml.tpl
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/base.tagent.yml.tpl"
rename to config/templates/base.tagent.yml.tpl
diff --git "a/04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/matrix.yaml" b/config/templates/matrix.yaml
similarity index 100%
rename from "04-\351\205\215\347\275\256\346\226\207\344\273\266/templates/matrix.yaml"
rename to config/templates/matrix.yaml
diff --git a/desktop/electron/main.ts b/desktop/electron/main.ts
index b04067c..71d8c0f 100644
--- a/desktop/electron/main.ts
+++ b/desktop/electron/main.ts
@@ -24,14 +24,8 @@ function getBackendPath(): string {
function startBackend(): Promise {
return new Promise((resolve, reject) => {
const isDev = !app.isPackaged;
- const cmd = isDev
- ? "python"
- : getBackendPath();
- const args = isDev
- ? ["-m", "runtime.cli.main", "run", "--help"] // dev: use CLI
- : [];
- // In dev mode, start uvicorn directly
+ // In dev mode, start uvicorn directly via python -c
const devArgs = isDev
? ["-c", `import uvicorn; uvicorn.run('runtime.api.main:app',host='127.0.0.1',port=${BACKEND_PORT})`]
: [];
@@ -141,7 +135,14 @@ function createWindow(): void {
});
mainWindow.webContents.setWindowOpenHandler(({ url }) => {
- shell.openExternal(url);
+ try {
+ const parsed = new URL(url);
+ if (parsed.protocol === "https:" || parsed.protocol === "http:") {
+ shell.openExternal(url);
+ }
+ } catch {
+ // Invalid URL — silently ignore
+ }
return { action: "deny" };
});
}
diff --git a/desktop/electron/preload.ts b/desktop/electron/preload.ts
index 61f60c5..30d4c4e 100644
--- a/desktop/electron/preload.ts
+++ b/desktop/electron/preload.ts
@@ -2,7 +2,7 @@ import { contextBridge, ipcRenderer } from "electron";
contextBridge.exposeInMainWorld("electronAPI", {
getBackendPort: () => 8800,
- getAppVersion: () => "1.34.0",
+ getAppVersion: () => "1.42.0",
platform: process.platform,
isElectron: true,
});
diff --git a/desktop/electron/preload_extended.ts b/desktop/electron/preload_extended.ts
index 557c886..033e004 100644
--- a/desktop/electron/preload_extended.ts
+++ b/desktop/electron/preload_extended.ts
@@ -53,7 +53,7 @@ contextBridge.exposeInMainWorld("tagendAPI", {
// ── Metadata ──
getBackendPort: () => 8800,
- getAppVersion: () => "1.33.0",
+ getAppVersion: () => "1.42.0",
platform: process.platform,
isElectron: true,
});
diff --git a/desktop/package.json b/desktop/package.json
index 28a3a40..4a50079 100644
--- a/desktop/package.json
+++ b/desktop/package.json
@@ -1,6 +1,6 @@
{
"name": "test-agent-desktop",
- "version": "1.32.0",
+ "version": "1.42.0",
"description": "Test-Agent Desktop — AI Testing Framework GUI",
"author": "Test-Agent Team",
"license": "MIT",
diff --git a/desktop/pyinstaller/tagent_backend.spec b/desktop/pyinstaller/tagent_backend.spec
index a36b0e0..d1564b7 100644
--- a/desktop/pyinstaller/tagent_backend.spec
+++ b/desktop/pyinstaller/tagent_backend.spec
@@ -13,10 +13,10 @@ a = Analysis(
pathex=[str(PROJECT_ROOT), str(RUNTIME)],
binaries=[],
datas=[
- (str(PROJECT_ROOT / "02-专家定义"), "02-专家定义"),
- (str(PROJECT_ROOT / "03-技能定义"), "03-技能定义"),
- (str(PROJECT_ROOT / "05-代码示例"), "05-代码示例"),
- (str(PROJECT_ROOT / "04-配置文件"), "04-配置文件"),
+ (str(PROJECT_ROOT / "agents"), "agents"),
+ (str(PROJECT_ROOT / "skills"), "skills"),
+ (str(PROJECT_ROOT / "config"), "config"),
+ (str(PROJECT_ROOT / "utils"), "utils"),
(str(PROJECT_ROOT / "VERSION"), "."),
],
hiddenimports=[
diff --git a/desktop/scripts/build-all.sh b/desktop/scripts/build-all.sh
index db1408e..86950ac 100644
--- a/desktop/scripts/build-all.sh
+++ b/desktop/scripts/build-all.sh
@@ -5,7 +5,12 @@ cd "$(dirname "$0")/.."
echo "=== Building Python backend ==="
bash scripts/build-python.sh
echo "=== Building Web UI ==="
-cd ../runtime/web && npm ci && npm run build && cd -
+WEB_DIR="../runtime/web"
+if [ -f "$WEB_DIR/package.json" ]; then
+ (cd "$WEB_DIR" && npm ci && npm run build)
+else
+ echo " (skip: $WEB_DIR/package.json not found)"
+fi
echo "=== Building Electron ==="
npm ci
npm run build:electron
diff --git a/desktop/scripts/build-python.sh b/desktop/scripts/build-python.sh
index 089649a..390bd92 100644
--- a/desktop/scripts/build-python.sh
+++ b/desktop/scripts/build-python.sh
@@ -3,6 +3,6 @@
set -euo pipefail
cd "$(dirname "$0")/.."
echo "Building Python backend..."
-pip install pyinstaller -q
+pip install pyinstaller || { echo "PyInstaller install failed"; exit 1; }
pyinstaller --clean --noconfirm pyinstaller/tagent_backend.spec
echo "Backend built: dist-python/tagent-backend"
diff --git a/docs/INDEX.md b/docs/INDEX.md
index b300042..b681a50 100644
--- a/docs/INDEX.md
+++ b/docs/INDEX.md
@@ -1,4 +1,4 @@
-# docs/ 索引(V1.10.0)
+# docs/ 索引(V1.42.0)
> 项目文档总入口 · 样式规范 / 教学理论 KB / 演示素材 / 用户调研 · 几分钟即可定位。
diff --git a/docs/MASTER_PLAN.md b/docs/MASTER_PLAN.md
index a4a5f1d..9f4d5cf 100644
--- a/docs/MASTER_PLAN.md
+++ b/docs/MASTER_PLAN.md
@@ -44,7 +44,7 @@
- **文件**: `.pre-commit-config.yaml` — `default_stages: [pre-commit]`
- **文件**: `install.sh` — 加安全建议 (推荐 `git clone` over `curl|bash`)
-### #5 `05-代码示例/` 安全杂项
+### #5 `utils/` 安全杂项
- `chaos_helper.py` — psutil absent → `RuntimeError` (not silent fallthrough)
- `protocol_helper.py` — SOAP XML `xml.sax.saxutils.escape()`
- `miniprogram_runner.py` — WebSocket `close()` wrap `try/finally`
@@ -63,10 +63,10 @@
- **文件**: `README.md` + `README.zh-CN.md` — "8640 combinations" → "~12 common combinations tested in CI"; "95% aspirational" → "Coverage is broad but not exhaustive"
- **文件**: `00-项目导航.md` — 移除 9 处 "主宪章 §X" 引用
- **文件**: `ROADMAP.md` — 移除 3 处 "主宪章" 引用
-- **文件**: `05-代码示例/generate_report.py` — `generate_test_report()` 143→30 行, 提取 6 helper
-- **文件**: `05-代码示例/mobile_driver.py` — `run_monkey()` 107→55 行, 提取 2 helper
+- **文件**: `utils/generate_report.py` — `generate_test_report()` 143→30 行, 提取 6 helper
+- **文件**: `utils/mobile_driver.py` — `run_monkey()` 107→55 行, 提取 2 helper
- **文件**: `runtime/router/llm_client.py` — `_stub_response()` 77 行 if/elif → dispatch table 8 条目
-- **文件**: `05-代码示例/fuzzer.py` — `ALL_PAYLOADS` 提升到模块级
+- **文件**: `utils/fuzzer.py` — `ALL_PAYLOADS` 提升到模块级
### #8 CLI 拆分 + 冒烟测试
- **CLI 拆分** (680→39 行 main.py):
@@ -103,8 +103,8 @@
- **修改**: `flows.py` + `direct.py` — skipped 独立追踪, 不计入 failures
### #12 共享 fixture 阻塞并行 ✅
-- **修改**: `04-配置文件/conftest.py:106` — `test_data` session→function + `tmp_path`
-- **修改**: `04-配置文件/conftest.py:150` — `browser_context` session→function
+- **修改**: `config/conftest.py:106` — `test_data` session→function + `tmp_path`
+- **修改**: `config/conftest.py:150` — `browser_context` session→function
### #13 DAG 执行进度 + 断路器 ✅
- **修改**: `flows.py` + `direct.py` — `MAX_FAILURES=3` 断路器 + 进度日志
@@ -127,7 +127,7 @@
- **不改**: `test_lead.py` — 独立使用,不破坏现有逻辑
### #16 Flaky 测试自动隔离 ✅
-- **修改**: `05-代码示例/flaky_detector.py` — 加 3 方法
+- **修改**: `utils/flaky_detector.py` — 加 3 方法
- `detect_trends()` — P-F-P / F-P-F 模式检测 + confidence scoring
- `generate_quarantine()` — 隔离清单 (pytest --deselect 兼容)
- `generate_pytest_markers()` — @pytest.mark.flaky 配置生成
@@ -139,7 +139,7 @@
- **不改**: `regression_scope.py` — 独立工具
### #18 需求可追溯性 ✅
-- **新建**: `05-代码示例/traceability_matrix.py` — 双向追溯矩阵
+- **新建**: `utils/traceability_matrix.py` — 双向追溯矩阵
- `TraceabilityMatrix` 类 — 需求↔用例↔缺陷 自动链接
- `to_markdown()` — markdown 表格导出
- 覆盖率统计 + 未覆盖需求 + 孤儿 bug 检测
@@ -222,7 +222,7 @@
- RiskItem (概率×影响=暴露) + RiskMatrix (summary/markdown export)
### #33 分类树方法(ISTQB 第10项)✅
-- `05-代码示例/classification_tree.py` — TreeModel + pairwise 组合生成 + 约束支持
+- `utils/classification_tree.py` — TreeModel + pairwise 组合生成 + 约束支持
---
@@ -233,7 +233,7 @@
- 支持第三方包注册 agents/skills/backends (group=`tagent`)
### #35 测试数据合成引擎 ✅
-- `05-代码示例/data_synthesizer.py` — PII 自动检测 + 确定性地掩码
+- `utils/data_synthesizer.py` — PII 自动检测 + 确定性地掩码
- `mask_pii()` — 邮件/手机/身份证/IP/信用卡 5 类检测
- `synthesize_from_json()` — 递归 walk + 掩码 + 写入
- `subset_json()` — 随机子集提取
diff --git a/docs/PHASE3_IMPLEMENTATION.md b/docs/PHASE3_IMPLEMENTATION.md
index 40aa3a2..e1153ab 100644
--- a/docs/PHASE3_IMPLEMENTATION.md
+++ b/docs/PHASE3_IMPLEMENTATION.md
@@ -230,7 +230,7 @@ if len(failures) >= MAX_FAILURES:
| 9 | 3 文件 | 2 文件 | +120, ~10 | 低 | ✅ done |
| 10 | 0 | 1 文件(direct.py) | ~30 | 中 | ✅ done |
| 11 | 0 | 3 文件(tasks/flows/direct) | ~20 | 低 | ✅ done |
-| 12 | 0 | 1 文件(04-配置文件/conftest.py) | ~5 | 低 | ✅ done |
+| 12 | 0 | 1 文件(config/conftest.py) | ~5 | 低 | ✅ done |
| 13 | 0 | 3 文件(flows/direct/tasks) | ~25 | 中 | ✅ done |
| **合计** | **3** | **6 (实际7)** | **~210** | | **5/5 done** |
@@ -255,7 +255,7 @@ if len(failures) >= MAX_FAILURES:
## 实施记录 (2026-05-17)
-**#12** `04-配置文件/conftest.py`: `test_data` scope=session→function + tmp_path, `browser_context` scope=session→function. 消除并行文件冲突.
+**#12** `config/conftest.py`: `test_data` scope=session→function + tmp_path, `browser_context` scope=session→function. 消除并行文件冲突.
**#11** `tasks.py` + `flows.py` + `direct.py`: on_failure=skip 节点设 summary.skipped=True, 不计入 failures. skipped 独立追踪.
**#9** 新建 `runtime/self_healing/` (retry.py + locator_store.py + __init__.py). `scripts.py` subprocess.run 外包 with_retry. `direct.py` _run_node execute_node 外包 with_retry. 指数退避 3 次重试.
**#10** `direct.py` 阻塞路径 + done_now 路径: 异常时 resubmit _run_node 最多 2 次, 指数退避 2^attempt 秒.
diff --git a/docs/STYLE.md b/docs/STYLE.md
index 6b233b0..2bc183b 100644
--- a/docs/STYLE.md
+++ b/docs/STYLE.md
@@ -87,7 +87,7 @@
| 场景 | 例外 |
|------|------|
-| 上游引入文件 | `03-技能定义/(darwin-skill\|karpathy-guidelines)/*` 沿用上游样式,不批改 |
+| 上游引入文件 | `skills/(darwin-skill\|karpathy-guidelines)/*` 沿用上游样式,不批改 |
| 自动生成文件 | `CHANGELOG.md` 由 Keep-a-Changelog 模板驱动 |
| 本地笔记 | 项目根 gitignored 文件不受本约束 |
diff --git "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md" "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md"
index 70f079a..703d614 100644
--- "a/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md"
+++ "b/docs/case-studies/2026-05-15-\350\257\232\345\256\236\345\214\226\344\270\216\351\230\262mock\351\227\255\347\216\257.md"
@@ -23,11 +23,11 @@ V1.14.0 起步时, README / ROADMAP 与代码实质之间存在 3 类 gap:
### Phase 1: agent / skill frontmatter labeling (PR #63 / #64 / #65)
-为每个 `02-专家定义/*.md` (16 个) 加 `EXPERT_IMPL_STATUS:` frontmatter, 为每个 `03-技能定义/*.md` (32 个) 加 `SKILL_IMPL_STATUS:` frontmatter。合法值:
+为每个 `agents/*.md` (16 个) 加 `EXPERT_IMPL_STATUS:` frontmatter, 为每个 `skills/*.md` (32 个) 加 `SKILL_IMPL_STATUS:` frontmatter。合法值:
```
production ← 真 LLM-driven runner (orchestrator/agents/*.py) 已实装
-script ← 真 script-backed (05-代码示例/*.py) 已实装
+script ← 真 script-backed (utils/*.py) 已实装
rollout ← V1.x rollout 待实装
vision ← V2.x 方法论参考 (仅 skill 有此状态)
```
@@ -42,7 +42,7 @@ PR #65 自纠错: 初版 4 个 agent 标注与 runtime/orchestrator/agents/ 实
|---|---|---|
| `README.md` L44 | "16 expert agents" | "16 expert agents (5 production + 5 script + 6 rollout — see ROADMAP.md)" |
| `README.md` L45 | "33 business skills + 3 meta-skills" | "32 business skills (7 production + 7 script + 16 rollout + 2 vision) + 3 meta-skills" |
-| `02-专家定义/README.md` L47 | "类别 3:垂直领域 2 Agent (V1.x rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) |
+| `agents/README.md` L47 | "类别 3:垂直领域 2 Agent (V1.x rollout)" | "类别 3:垂直领域扩展 2 Agent" + L54 显式列 6 rollout 全集 (跨三类别) |
| `ROADMAP.md` | 仅 6 expert rollout 节 | 加「当前活跃 14 skill」+「V1.x rollout 16 skill」+「V2.x vision 2 skill」三节 |
**关键诚实点**: 旧 "33 skill" 是真错 (实 32, 3 meta 单列), PR #66 一并修。
@@ -84,7 +84,7 @@ X4 真正核心 = **skill 防 mock + 单源化**, 不是单纯 router 过滤。
X4 验时发现 pre-existing baseline 坏测试 `test_router_ends_with_report_generator`。根因: V1.14 把 `test-lead` 加到 stub 的 web-system path 末 (按主宪章 §40 "测试主管 — 协调 + 最终上线建议"), 但 mobile-app / desktop-app / ai-model / rest-api 4 path 未同步。
-修法: 5 path 末统一 `test-lead` 决策 (与 `02-专家定义/README.md` L20-31 流程对齐):
+修法: 5 path 末统一 `test-lead` 决策 (与 `agents/README.md` L20-31 流程对齐):
```
bug-manager → report-generator → test-lead 决策
@@ -100,7 +100,7 @@ bug-manager → report-generator → test-lead 决策
|---|---|---|
| Agent frontmatter | 16 | EXPERT_IMPL_STATUS 全标 |
| Skill frontmatter | 32 | SKILL_IMPL_STATUS 全标 |
-| 文档诚实化 | 4 | README × 2 + 02-专家定义/README + ROADMAP |
+| 文档诚实化 | 4 | README × 2 + agents/README + ROADMAP |
| Runtime 代码 | 4 | registry / experts / router / llm_client |
| 测试 | 2 | test_impl_status_filter.py (新 13 cases) + test_router.py (修) |
@@ -136,7 +136,7 @@ f6 假阳性 3 问全否后撤项。
### 决策 4: stub 5 path 统一 test-lead (而非容错测试)
-`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub (V1.14 漏改 4 path 同步) + rename 测试, 与 `02-专家定义/README.md` 流程对齐。
+`test_router_ends_with_report_generator` 旧测试错, 不是 stub 错。改修 stub (V1.14 漏改 4 path 同步) + rename 测试, 与 `agents/README.md` 流程对齐。
## 5. 教训 / 复用 pattern
diff --git a/docs/charter/01-vision-dimensions.md b/docs/charter/01-vision-dimensions.md
index 4801609..747961c 100644
--- a/docs/charter/01-vision-dimensions.md
+++ b/docs/charter/01-vision-dimensions.md
@@ -17,16 +17,16 @@
| 根目录 | README.md | 简明入口(≤ 200 行) | 所有用户 |
| **根目录** | **00-项目导航.md** | **按职责分类速查(通用流程 / 平台专项 / 协议 / 输入 / CI)** | **所有用户** |
| **根目录** | **FULL_GUIDE.md(本文档)** | **永久宪章 + 完整指南** | **所有用户** |
-| `01-快速开始/` | 使用手册.md | 快速上手指南 + FAQ | 所有用户 |
-| `01-快速开始/` | 部署说明.md | 跨平台部署(Win/Mac/Linux 含 Java/JMeter/Allure) | 运维/测试 |
-| `01-快速开始/` | 配置清单.md | 一站式配置文档(.env 全字段 + Secrets + Webhook 申请) | 所有用户 |
-| `01-快速开始/` | 交付物清单.md | 测试计划 / 测试报告 / Bug 等对外提交物落地位置与责任 | 所有用户 |
-| `02-专家定义/` | 16 个 .md(9 核心 + 5 平台 + 2 垂直) + README 索引 | Agent 定义文件 | 开发人员 |
-| `03-技能定义/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 |
-| `04-配置文件/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 |
-| `04-配置文件/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 |
-| `05-代码示例/` | utils(49 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 |
-| `06-CICD集成/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps |
+| `docs/getting-started/` | 使用手册.md | 快速上手指南 + FAQ | 所有用户 |
+| `docs/getting-started/` | 部署说明.md | 跨平台部署(Win/Mac/Linux 含 Java/JMeter/Allure) | 运维/测试 |
+| `docs/getting-started/` | 配置清单.md | 一站式配置文档(.env 全字段 + Secrets + Webhook 申请) | 所有用户 |
+| `docs/getting-started/` | 交付物清单.md | 测试计划 / 测试报告 / Bug 等对外提交物落地位置与责任 | 所有用户 |
+| `agents/` | 16 个 .md(9 核心 + 5 平台 + 2 垂直) + README 索引 | Agent 定义文件 | 开发人员 |
+| `skills/` | 32 个 Skill 文件(业务 skill) + 3 个元 Skill 子目录 + README 索引 | 可复用测试技能 | 开发人员 |
+| `config/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 |
+| `config/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 |
+| `utils/` | utils(78 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 |
+| `ci/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps |
---
@@ -110,17 +110,20 @@
| 决策回放器 | 任一判断可复现、可反驳 | 工程层 | `workspace/执行日志/decisions/` + tracing | ✅ |
| 数字考古学家 | 追溯遗留系统初始假设 | 文明层 | Phase 4 知识图谱冷启动 | ❌ |
| 缓慢暴力监测器 | 跨发布周期跟踪代际效应 | 文明层 | 需多年数据积累,Phase 4 | ❌ |
-| 缺席者画像生成器 | 强制注入边缘用户场景 | 文明/权力层 | a11y_scanner + i18n_checker + 边缘剧本库 | ⚪ |
+| 缺席者画像生成器 | 强制注入边缘用户场景 | 文明/权力层 | absentee_scenario_injector.py (9组场景) | ✅ |
| 现实缝合力探针 | 测试平台对半真半假内容的免疫 | 社会权力层 | ai_adversarial 扩展 | ⚪ |
+| 公平性审计器 | 数据集/模型/决策公平性指标 (DI/EO/校准/交叉) | 社会权力层 | fairness_auditor.py | ✅ |
+| 沉默故障探测器 | 无报警漂移检测/趋势分析/多源聚合 | 工程层 | silent_failure_detector.py | ✅ |
+| 缺席者场景注入器 | 9组边缘场景(残障/老年/未成年/离线/危机/非母语)剧本库+章节生成 | 文明/权力层 | absentee_scenario_injector.py | ✅ |
| 末日哨兵 | 计算"这一次就是那一次"概率 | 文明层 | 需监管/学界共识授权,Phase 4 | ❌ |
-| 神圣性守护器 | 识别宗教/纪念场景的不可亵渎边界 | 簇 9 | i18n_checker 禁忌矩阵扩展 | ❌ |
+| 神圣性守护器 | 识别宗教/纪念场景的不可亵渎边界 | 簇 9 | i18n_checker + taboo_matrix 禁忌矩阵 | ✅ |
| 精神危机响应器 | 模拟危机状态用户、验证交接路径 | 簇 9 | 缺席者剧本库子集 | ❌ |
| 踩踏推演器 | 群体情绪与系统反馈的正反馈回路 | 簇 9 | chaos_helper 扩展 | ❌ |
-| 司法证据包生成器 | 决策链、模型版本、数据集打包 | 簇 9 | dora_metrics + decisions/ 打包脚本 | ⚪ |
+| 司法证据包生成器 | 决策链、模型版本、数据集打包 | 簇 9 | evidence_chain.py + dora_metrics + decisions/ 打包脚本 | ✅ |
| 禁忌矩阵 | 跨文化禁忌词/色/数/节日组合 | 簇 9 | i18n_checker 本地化共建 | ❌ |
-| Bug 多适配引擎 | 5 套 tracker 切换 | 工程层 | `utils/bug_tracker_*.py` | ✅ |
+| Bug 多适配引擎 | 5 套 tracker 切换 | 工程层 | `utils/bug_tracker_base.py` + `zentao_bug_manager.py` + `jira_bug_manager.py` + `github_bug_manager.py` + `linear_bug_manager.py` + `webhook_bug_manager.py` | ✅ |
| AgentChat 协调器 | 讨论触发 / 中枢路由 / 反问留档 | 工程层 | test-lead + `discussions/` | ✅ |
-| 按需安装引擎 | 6 层依赖 + 运行时补装 | 工程层 | `install.sh` + frontmatter requires_layer | ✅ |
+| 按需安装引擎 | 6 层依赖 + 运行时补装 | 工程层 | `requirements/` (base/mobile/desktop/visual/system/ai/perf 七文件) + `install.sh` | ✅ |
| darwin-skill 自进化 | skill 文本结构棘轮优化 | 工程/元层 | `.claude/skills/darwin-skill/` | ✅ |
---
@@ -164,7 +167,7 @@
**探索 + 元工具 4 个**:`build-your-own-x-explorer` + `karpathy-guidelines` + `darwin-skill` + `nuwa-skill`
-> 完整 32 业务 Skill + 3 元 Skill 清单见 [ROADMAP.md](../../ROADMAP.md) 与 [03-技能定义/README.md](../../03-技能定义/README.md)。
+> 完整 32 业务 Skill + 3 元 Skill 清单见 [ROADMAP.md](../../ROADMAP.md) 与 [skills/README.md](../../skills/README.md)。
### 工程级质量门禁(分层)
diff --git a/docs/charter/02-coverage-matrix.md b/docs/charter/02-coverage-matrix.md
index bfcb16d..c316e2a 100644
--- a/docs/charter/02-coverage-matrix.md
+++ b/docs/charter/02-coverage-matrix.md
@@ -54,14 +54,14 @@
| AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ai-tester | ✅ |
| 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ |
| DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ |
-| 伦理 / 偏见审计(数据集/模型/决策公平性) | ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ⚪ Phase 3 |
-| 沉默故障检测(无报警的恶化) | tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ⚪ Phase 3 |
+| 伦理 / 偏见审计(数据集/模型/决策公平性) | fairness_auditor.py + ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ✅ |
+| 沉默故障检测(无报警的恶化) | silent_failure_detector.py + tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ✅ |
| 决策可回放(任一判断可复现可反驳) | tracing_validator + history 归档 + 模型版本快照 | test-lead | ✅ |
-| 缺席者场景注入(残障/老年/未成年/未联网/精神危机) | a11y_scanner + i18n_checker + 边缘场景剧本库 | testcase-designer | ⚪ Phase 3 |
-| 证据链可采信性(司法/审计/监管送审) | dora_metrics + tracing_validator + 决策日志打包 | bug-manager | ⚪ Phase 4 |
-| 神圣性与跨文化禁忌边界(宗教/葬礼/儿童/纪念) | i18n_checker + 禁忌词/色/数/节日组合(本地化共建) | testcase-designer | ⚪ Phase 5 |
+| 缺席者场景注入(残障/老年/未成年/未联网/精神危机) | absentee_scenario_injector.py + a11y_scanner + i18n_checker + 边缘场景剧本库 | testcase-designer | ✅ |
+| 证据链可采信性(司法/审计/监管送审) | evidence_chain.py + dora_metrics + tracing_validator + 决策日志打包 | bug-manager | ✅ |
+| 神圣性与跨文化禁忌边界(宗教/葬礼/儿童/纪念) | i18n_checker + taboo_matrix + 禁忌词/色/数/节日组合(本地化共建) | testcase-designer | ✅ |
| Skill 自进化(darwin-skill 双重评估 + 棘轮) | darwin-skill SKILL.md + results.tsv + 子 agent 实测 | test-lead 触发 | ✅ |
-| Bug 工具多适配(禅道/Jira/GitHub/Linear/Webhook) | bug_tracker_base + 5 adapter | bug-manager | ✅ |
+| Bug 工具多适配(5 套 tracker 全部实装) | bug_tracker_base + zentao/jira/github/linear/webhook_bug_manager | bug-manager | ✅ |
| Agent 协作纪要(讨论/反问/通信落档) | agentchat_recorder + workspace/执行日志/discussions/ | test-lead | ✅ |
### 矩阵 C:用例设计方法(ISTQB 经典)
@@ -97,8 +97,8 @@
/单元(70%)/ ← pytest + pytest-mock,秒级反馈
```
-**总覆盖率 ~95%**(含闭环:Bug 多适配 + 多端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
+**总覆盖率 ~90%**(含闭环:Bug 多适配 + 多端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
-剩 ~5% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。
+剩 ~10% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。
---
diff --git a/docs/charter/04-skills-bugtracker.md b/docs/charter/04-skills-bugtracker.md
index a235ca5..cbb819d 100644
--- a/docs/charter/04-skills-bugtracker.md
+++ b/docs/charter/04-skills-bugtracker.md
@@ -70,10 +70,10 @@ V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decision
| 适配器 | 状态 | 配置字段 | severity 映射 |
|--------|------|---------|--------------|
| **zentao**(默认) | ✅ V1.0.0 | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 |
-| **jira** | ✅ V1.0.0 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 |
-| **github** | ✅ V1.0.0 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` |
-| **linear** | ✅ V1.0.0 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 |
-| **webhook** | ✅ V1.0.0 | `BUG_WEBHOOK_URL`(POST JSON) | 调用方自定义 |
+| **jira** | ⚪ Phase 2 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 |
+| **github** | ⚪ Phase 2 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` |
+| **linear** | ⚪ Phase 2 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 |
+| **webhook** | ⚪ Phase 2 | `BUG_WEBHOOK_URL`(POST JSON) | 调用方自定义 |
### 2. 切换方式
diff --git a/docs/charter/05-install-deploy.md b/docs/charter/05-install-deploy.md
index be8c37c..be1bf22 100644
--- a/docs/charter/05-install-deploy.md
+++ b/docs/charter/05-install-deploy.md
@@ -8,7 +8,9 @@
> install.sh 不再一次性装全。**用户选了什么形态,才装什么依赖**——避免 mobile 用户被强装 desktop 工具,反之亦然。
-### 1. 依赖六层划分
+### 1. 依赖六层划分(Phase 2 规划)
+
+> **当前状态**:`install.sh` 通过 `pip install -r requirements.txt` 统一安装。分层按需安装(按产品形态选择性装依赖)为 Phase 2 路线图项。`requirements/` 目录含规划文档。
| 层 | requirements 文件 | 触发条件 | 关键包 |
|----|----------------|---------|--------|
@@ -152,7 +154,7 @@ git clone https://github.com/Wool-xing/Test-Agent.git
bash Test-Agent/install.sh /path/to/your-test-project
```
-> 默认仓库为 `Wool-xing/Test-Agent`。fork 后将路径替换为你自己用户名(或用 `TEST_AGENT_REPO_URL` 环境变量覆盖)。Windows / 手动方式见 `01-快速开始/部署说明.md`。
+> 默认仓库为 `Wool-xing/Test-Agent`。fork 后将路径替换为你自己用户名(或用 `TEST_AGENT_REPO_URL` 环境变量覆盖)。Windows / 手动方式见 `docs/getting-started/部署说明.md`。
`install.sh` 自动完成:克隆模板 → 装 Claude Code → 建目录 → 拷贝全部文件 → 装 Python 依赖 + Playwright。
@@ -193,7 +195,7 @@ claude
> 注:`>` 后面是 Claude Code 提示符的输入(斜杠技能或自然语言),**不是 shell 命令**。
-详细启动指引(含 Java/JMeter/Allure 安装、.env 必填、首次跑通验证)→ `01-快速开始/使用手册.md` 顶部「🚀 启动指引」章节。
+详细启动指引(含 Java/JMeter/Allure 安装、.env 必填、首次跑通验证)→ `docs/getting-started/使用手册.md` 顶部「🚀 启动指引」章节。
---
@@ -246,7 +248,7 @@ your-test-project/
├── .claude/{agents,skills}/ ← 16 agent + 32 skill(业务) + 3 元 skill
├── .github/workflows/test.yml
├── Jenkinsfile
-├── utils/ ← 49 个 .py + __init__
+├── utils/ ← 78 个 .py + __init__
├── src/ ← 被测系统源码(cov 指向)
├── workspace/
│ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/
@@ -270,7 +272,7 @@ your-test-project/
## 🛠️ 升级 / 回滚 / 卸载
-详见 `01-快速开始/部署说明.md` "升级 / 回滚 / 卸载" 章节。
+详见 `docs/getting-started/部署说明.md` "升级 / 回滚 / 卸载" 章节。
升级会覆盖:`.claude/agents/`、`.claude/skills/`、`utils/`、`conftest.py`、`pytest.ini`、`requirements.txt`、`.mcp.json`、`.github/workflows/test.yml`、`Jenkinsfile`。
不会覆盖:`.env`、`workspace/`、`src/`。
@@ -280,8 +282,8 @@ your-test-project/
## 🤝 协作与反馈
- 文档结构、Bug 反馈:在仓库内提 issue
-- 功能扩展:先在 `02-专家定义/` 加 agent / `03-技能定义/` 加 skill,详见 `CONTRIBUTING.md`
-- 改动 `utils/` 时同步更新 `04-配置文件/requirements.txt` 与 `06-CICD集成/` 中的引用
+- 功能扩展:先在 `agents/` 加 agent / `skills/` 加 skill,详见 `CONTRIBUTING.md`
+- 改动 `utils/` 时同步更新 `config/requirements.txt` 与 `ci/` 中的引用
---
@@ -295,7 +297,7 @@ your-test-project/
| `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 |
| `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 |
| `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 |
-| `utils/*.py`(49 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 |
+| `utils/*.py`(76 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 |
| pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 |
| CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 |
| conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 |
diff --git a/docs/charter/06-test-architecture.md b/docs/charter/06-test-architecture.md
index b7ffa0a..d1c7dda 100644
--- a/docs/charter/06-test-architecture.md
+++ b/docs/charter/06-test-architecture.md
@@ -52,14 +52,14 @@
| L1 | **需求阶段** | `requirements-analyst` 双轨输出(MD + JSON)+ 风险矩阵 | 弱(评审) |
| L2 | **设计阶段** | `testcase-designer` 等价类/边界值/状态迁移/配对测试 + 风险矩阵 | 弱(评审) |
| L3 | **IDE 编码时** | ruff + mypy + IDE 实时提示 | 强(编辑器红线) |
-| L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 16/32/49 文件统计 | 强(阻断 commit) |
+| L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 16/32/67 文件统计 | 强(阻断 commit) |
| L5 | **PR gate** | CodeQL + pip-audit + safety + ci.yml 全套 | 强(阻断合入) |
-| L6 | **静态分析** | Bandit(Python SAST)+ ZAP/Burp Pro(DAST) | 中(发现/修) |
-| L7 | **契约测试** | `utils/contract_test.py` consumer-side / provider-side | 强(CI 阻断) |
+| L6 | **静态分析** | `security_scanner.py`(已实现)+ Bandit/ZAP/Burp Pro(Phase 2 CI 集成) | 中(发现/修) |
+| L7 | **契约测试** | `utils/ci_contract_gate.py` + `contract_test_generator.py` + CI job | 强(CI 阻断) |
-**Test-Agent 现状评估**:L1-L5 已串通;L6 在 utils 已有 `security_scanner.py`;L7 utils 存在但未串成"自动 PR 阻断"链路。
+**Test-Agent 现状评估**:L1-L7 全部串通。L7 已通过 `ci_contract_gate.py` 实现自动检测 OpenAPI spec 变更 → 生成契约 → CI job 验证阻断。
-**Phase 2 收尾点**:把 L7 契约测试串成"PR 改了 OpenAPI spec → 自动跑 contract → 不通过阻断合入"链路。
+**Phase 2 收尾点**:✅ 已完成。L7 契约链路已串成"PR 改了 OpenAPI spec → 自动跑 contract → 不通过阻断合入"。
### 3. Shift-Right(右移)— 生产即测试环境
@@ -115,7 +115,7 @@
| **regression** | merge 到 main / develop | P0=100% / P1≥95% / 总体≥90% / cov ≥80% / Flaky <5% | 评估遗留风险 | `utils/ci_quality_gate.py::GATES['regression_p0_p1']` |
| **performance_ci_quick** | CI 默认(5 并发) | TPS≥20 / P95≤800ms / err <1% | 警告不阻 | `utils/jmeter_result_parser.DEFAULT_GATES_CI_QUICK` |
| **performance_full** | release/* 分支 + 手动(50 并发) | TPS≥100 / P95≤500ms / 基线回归 <20% | 阻断 release | `utils/jmeter_result_parser.DEFAULT_GATES_FULL` |
-| **release** | 上线前 | 上述全 PASS + bug-manager 审批 + test-lead 决策 | 不上线 | `02-专家定义/01-测试主管.md::上线决策` |
+| **release** | 上线前 | 上述全 PASS + bug-manager 审批 + test-lead 决策 | 不上线 | `agents/01-测试主管.md::上线决策` |
**门禁可配置性**:阈值集中在 `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*`。Phase 2 抽 `quality_gate_engine.py` + yaml 驱动,让用户改阈值不需改代码。
@@ -133,7 +133,7 @@
| Phase | 触发条件 | 标志性交付 |
|------|---------|----------|
-| **Phase 1**(已完成 V1.0.0-V1.32.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config |
+| **Phase 1**(已完成 V1.0.0-V1.36.0) | 概念宪章成 + 工程基线就绪 + expert rollout 收尾 + skill rollout 完成 | 16 expert (11p+5s) + 32 skill (23p+7s+0r+2v) + AgentChat + Bug 多适配 + 按需安装 + darwin-skill + MCP + 教学层 + Marketplace + 多 LLM config |
| **Phase 2** | utils 单测覆盖 ≥ 60% 且团队 ≥ 5 人 | 契约链路串通 + 门禁引擎 yaml 抽象 + 反问 KB 重新评估 + skill rollout 继续 |
| **Phase 3** | Phase 2 全交付 + 接入 ≥ 2 行业 | 合成监控 + canary/feature flag + 统一 dashboard + 沉默故障 + 缺席者注入 |
| **Phase 4** | 接入合规行业(金融/医疗/司法)| 证据链司法可采信打包 + 数字考古学家 + AI 测试深化 |
@@ -145,7 +145,7 @@
|------|------|-----------|---------|
| **金字塔单元层** | 弱(utils 自身无测试) | Phase 2 | `tests/test_utils_*.py` 全覆盖 + 变异测试反向用 |
| **Shift-Left L7 契约链路** | utils 雏形未串通 | Phase 2 | OpenAPI 改动 → contract → PR 阻断 |
-| **门禁引擎抽象** | 阈值写死代码 | Phase 2 | `utils/quality_gate_engine.py` + yaml 驱动 |
+| **门禁引擎抽象** | 阈值写死代码 | Phase 2 | ✅ `utils/quality_gate_engine.py` + `config/quality_gates.yaml` 驱动 |
| **Shift-Right R1 合成监控** | 缺 | Phase 3 | `utils/synthetic_monitor.py` |
| **Shift-Right R4 canary + feature flag** | 缺 | Phase 3 | `utils/canary_runner.py` + `feature_flag_validator.py` |
| **可观测统一 dashboard** | 散落 HTML 报告 | Phase 3 | DORA + 缺陷密度 + flaky + 变异分数 → Grafana / 静态 HTML 模板 |
diff --git a/docs/charter/07-runtime-license.md b/docs/charter/07-runtime-license.md
index fac2ef2..e532f7f 100644
--- a/docs/charter/07-runtime-license.md
+++ b/docs/charter/07-runtime-license.md
@@ -4,9 +4,9 @@
内容与原 FULL_GUIDE.md 对应段完全一致, 仅拆不动语义。
-->
-## 🧠 V1.32.0 运行时层(`runtime/`)
+## 🧠 V1.36.0 运行时层(`runtime/`)
-> 已有 16 专家 / 32 Skill / 49 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。
+> 已有 16 专家 / 32 Skill / 78 utils**不动**(宪章铁律),`runtime/` 作可执行调度层 + 真 LLM-driven agent/skill runner。
> 让"文档+脚本工具箱"升级为"可被 API/CLI/CI 直接调用的运行时"。
### 模块拓扑
@@ -23,9 +23,9 @@ runtime/router ← LiteLLM 多厂商 + Ollama 兜底
▼
runtime/orchestrator ← Prefect 2.x flow + Direct 降级执行器
│
- ├─► 02-专家定义/*.md ← Claude Code 加载
- ├─► 03-技能定义/*.md ← Skill 调用
- └─► 05-代码示例/*.py ← 49 脚本(subprocess 隔离)
+ ├─► agents/*.md ← Claude Code 加载
+ ├─► skills/*.md ← Skill 调用
+ └─► utils/*.py ← 67 脚本(subprocess 隔离)
│
▼
runtime/storage 飞轮 ← Postgres+pgvector + MinIO
@@ -104,13 +104,13 @@ uvicorn runtime.api.main:app --port 8800
- 5 类典型输入(web/api/mobile/desktop/ai-model)stub 路由 = 5/5(100%)
- M1 门槛:多模型真测 ≥85%;不达 → 双模型投票
-### 与 16 专家 / 32 Skill / 49 utils 的关系
+### 与 16 专家 / 32 Skill / 78 utils 的关系
| 项 | 关系 |
|----|------|
| 16 专家 `.md` | **不动**。`registry` 扫 frontmatter,`router` 喂 LLM 选用 |
| 32 Skill `.md` | **不动**。同上 |
-| 49 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 |
+| 78 utils `.py` | **不动**。`orchestrator/adapters/scripts.py` subprocess 隔离调用 |
| `utils/` 通知/Bug | 复用 `generate_report.py` / `zentao_bug_manager.py` |
任何专家/Skill/脚本**新增**或**修改**仍按宪章 §1 同步铁律走;`runtime/` 是新增 **调度** 层,不重复实现专家逻辑。
@@ -121,7 +121,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增
## 📜 LICENSE / CHANGELOG / CONTRIBUTING / SECURITY
- **LICENSE**:MIT(详见 [`LICENSE`](LICENSE))
-- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.32.0 含 17 版累积 / expert rollout 收尾 / skill rollout 全 14/14 完成)
+- **CHANGELOG**:详见 [`../../CHANGELOG.md`](../../CHANGELOG.md)(V1.36.0 / expert rollout 收尾 + skill rollout 全 16/16 完成)
- **VERSION**:详见 [`VERSION`](VERSION)
- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](CONTRIBUTING.md)(含同步铁律 + RACI 矩阵)
- **SECURITY**:详见 [`SECURITY.md`](SECURITY.md)(漏洞报告流程 + GitHub Security Advisories 入口)
@@ -131,11 +131,11 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增
## 🗺️ 项目当前状态与下次会话快速指引
-### 当前阶段(最后更新:2026-05-17)
+### 当前阶段(最后更新:2026-05-18)
-- **Phase**:Phase 2 前期(V1.32.0 · expert rollout 收尾 + skill rollout 完成 14/14)
+- **Phase**:Phase 2 前期(V1.36.0 · expert rollout 收尾 + skill rollout 全 16/16 完成)
- **关键已交付**:16 expert (11p+5s) · 32 skill (23p+7s+0r+2v) · AgentChat · Bug 多适配 · 按需安装 · darwin-skill · MCP 6 件套 · Marketplace · 教学层 · 多 LLM config · 16 SkillRunner 全落地
-- **活跃 PR**:无(V1.32.0 release,2026-05-17)
+- **活跃 PR**:#124-#127 merged(V1.34-V1.36, 2026-05-18)
### 历史关键决议
@@ -143,7 +143,7 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增
- 2026-05-11:FULL_GUIDE.md 确立永久宪章地位
- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa);反问 KB 不进 V1.0.0
- 2026-05-12 ~ 2026-05-14:V1.1-V1.14 runtime 层 + 教学层 + Marketplace + 渗透/车载 + Hermes + GBrain + Karpathy + ECC
-- 2026-05-15 ~ 2026-05-17:V1.15-V1.32 18 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile
+- 2026-05-15 ~ 2026-05-18:V1.15-V1.36 22 版迭代 — 16 expert 全 production + 23 SkillRunner 生产落地 + 多 LLM config + 深审修复 + GUI + Dashboard + Mobile + 65发现全修
### 下次会话进入项目时,按顺序检查
@@ -155,8 +155,8 @@ V1.14+ 真 LLM-driven agent runner + V1.21+ SkillRunner 系统为 runtime 新增
### 来源与引用(认知史)
- 第一至五轮(DeepSeek + Claude):测试 Agent 架构 + 九大簇
-- V1.0.0 工程基线:14 agent + 14 skill + 49 utils + CI/CD(历史基线)
-- V1.1.0 ~ V1.32.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP)
+- V1.0.0 工程基线:14 agent + 14 skill + 76 utils + CI/CD(历史基线)
+- V1.1.0 ~ V1.36.0:runtime + 11 agent runner + 16 skill runner + 教学/市场/多LLM(详见 CHANGELOG + ROADMAP)
- 永久宪章糅合(2026-05-11/14/16):FULL_GUIDE 工程主体 + 全局记忆哲学维度 + 持续回写
---
diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/INDEX.md" b/docs/getting-started/INDEX.md
similarity index 94%
rename from "01-\345\277\253\351\200\237\345\274\200\345\247\213/INDEX.md"
rename to docs/getting-started/INDEX.md
index 087ec0a..49421fa 100644
--- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/INDEX.md"
+++ b/docs/getting-started/INDEX.md
@@ -1,4 +1,4 @@
-# 01-快速开始 索引
+# docs/getting-started 索引
> 顶层导航见根目录 `00-项目导航.md`;完整详细文档见根目录 `FULL_GUIDE.md`。
@@ -17,7 +17,7 @@
|------|------|
| **新用户首次部署** | 部署说明 → 配置清单 → 使用手册 → 交付物清单 |
| **测试工程师日常** | 使用手册 → 交付物清单 |
-| **运维 / DevOps** | 部署说明 → `06-CICD集成/CICD集成说明.md` |
+| **运维 / DevOps** | 部署说明 → `ci/CICD集成说明.md` |
| **决策评审** | 根目录 `README.md` → `00-项目导航.md` → 本目录 |
## 快速链接
diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md" "b/docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md"
similarity index 100%
rename from "01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md"
rename to "docs/getting-started/\344\272\244\344\273\230\347\211\251\346\270\205\345\215\225.md"
diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md" "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md"
similarity index 96%
rename from "01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md"
rename to "docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md"
index 3ce8246..5c28b69 100644
--- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\344\275\277\347\224\250\346\211\213\345\206\214.md"
+++ "b/docs/getting-started/\344\275\277\347\224\250\346\211\213\345\206\214.md"
@@ -10,11 +10,11 @@
```text
A. 模板仓库(你下载的) B. 真实测试项目(你工作的)
Test-Agent/ ─部署→ your-test-project/
-├── 02-专家定义/ ├── .claude/agents/
-├── 03-技能定义/ ├── .claude/skills/
-├── 04-配置文件/ ├── conftest.py / pytest.ini / .env / .mcp.json
-├── 05-代码示例/ ├── utils/(49 个 .py)
-└── 06-CICD集成/ ├── .github/workflows/test.yml + Jenkinsfile
+├── agents/ ├── .claude/agents/
+├── skills/ ├── .claude/skills/
+├── config/ ├── conftest.py / pytest.ini / .env / .mcp.json
+├── utils/ ├── utils/(78 个 .py)
+└── ci/ ├── .github/workflows/test.yml + Jenkinsfile
├── src/ ← 你的被测系统源码
└── workspace/ ← 测试产出
```
@@ -57,7 +57,7 @@ bash Test-Agent/install.sh /path/to/your-test-project
2. clone 模板到临时目录
3. 装 Claude Code(如未装)
4. 创建项目目录结构(含 `测试计划/` 等 13 个 workflow 子目录)
-5. 拷贝 16 expert + 32 skill + 49 utils + 配置 + CI/CD 文件
+5. 拷贝 16 expert + 32 skill + 79 utils + 配置 + CI/CD 文件
6. 创建 `.venv` + 装 requirements + playwright chromium
7. 复制 `.env.example` → `.env`(已存在则跳过)
@@ -145,9 +145,9 @@ claude # 启动 Claude Code
```bash
cd your-test-project
claude --version && python --version && java -version && jmeter --version && allure --version
-ls .claude/agents/*.md | wc -l # 应为 9
-ls .claude/skills/*.md | wc -l # 应为 8
-ls utils/*.py | wc -l # 应为 12
+ls agents/[0-9]*.md | wc -l # 应为 16
+ls skills/*.md | grep -v README | wc -l # 应为 32
+find utils -name "*.py" ! -name "__init__.py" | wc -l # 应为 78
python -c "from utils.api_retry_util import call_with_retry; print('OK')"
pytest --collect-only # 可 0 用例,但不应 ImportError
```
@@ -446,7 +446,7 @@ test-lead 编排:核心 8 + 上述 3 个分支并行
→ 解析 PDF,提取文本(含表格)
2. 调 utils.prd_loader.suggest_agents(text)
→ 识别:Android+iOS+API+性能
- 3. 编排 14 个专家中实际需要的子集
+ 3. 编排 16 个专家中实际需要的子集
→ 跳过 visual-tester/system-tester/ai-tester(PRD 未涉及)
4. 输出测试计划(IEEE 829)+ 启动执行链
```
@@ -464,7 +464,7 @@ python -m utils.prd_loader docs/PRD.pdf --detect --save-text workspace/需求分
需要提交"测试计划 / 测试报告 / Bug 列表"等对外材料?
-→ 详见 [`01-快速开始/交付物清单.md`](./交付物清单.md)(含路径 / 责任 / 格式 / CI 归档 / 提交自检)
+→ 详见 [`docs/getting-started/交付物清单.md`](./交付物清单.md)(含路径 / 责任 / 格式 / CI 归档 / 提交自检)
**关键提交物一句话**:
- 开测前 → `workspace/测试计划/test_plan_*.md`
@@ -660,7 +660,7 @@ python -m utils.jmeter_result_parser \
## V1.1.0 · 运行时 CLI/API 用法
-> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 49 脚本不动,本层仅作可执行调度。
+> 运行时层 `runtime/` 是 V1.1.0 新增。已有 16 专家 / 32 Skill / 67 脚本不动,本层仅作可执行调度。
### CLI 命令速查
@@ -729,4 +729,4 @@ TAGENT_LLM_PROVIDER=stub python -m runtime.cli.main run "..." # 测试,不
| **V1.0.0 Claude Code 工作流** | 团队跟 Claude Code 互动测试,深度对话 | `claude` → `/smoke-test` |
| **V1.1.0 runtime 模式** | 自动化跑批,CI 集成,Web UI 接入 | `tagent run` 或 `POST /run/*` |
-两种模式共享同一份 `02-专家定义/` `03-技能定义/` `05-代码示例/`,无冗余。
+两种模式共享同一份 `agents/` `skills/` `utils/`,无冗余。
diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md" "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md"
similarity index 92%
rename from "01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md"
rename to "docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md"
index 5ff6be8..0e93cb5 100644
--- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\203\250\347\275\262\350\257\264\346\230\216.md"
+++ "b/docs/getting-started/\351\203\250\347\275\262\350\257\264\346\230\216.md"
@@ -125,7 +125,7 @@ $agents = @(
"13-系统集成测试.md", "14-AI模型测试.md"
)
foreach ($f in $agents) {
- Copy-Item "$TEMPLATE_DIR\02-专家定义\$f" "$PROJECT_ROOT\.claude\agents\" -Force
+ Copy-Item "$TEMPLATE_DIR\agents\$f" "$PROJECT_ROOT\.claude\agents\" -Force
}
# 拷贝 Skill 定义(32 个,glob 自动覆盖)
@@ -137,19 +137,19 @@ $skills = @(
"system-test.md", "ai-test.md"
)
foreach ($f in $skills) {
- Copy-Item "$TEMPLATE_DIR\03-技能定义\$f" "$PROJECT_ROOT\.claude\skills\" -Force
+ Copy-Item "$TEMPLATE_DIR\skills\$f" "$PROJECT_ROOT\.claude\skills\" -Force
}
# 拷贝配置文件(pytest / mcp / env / requirements)
-Copy-Item "$TEMPLATE_DIR\04-配置文件\conftest.py" "$PROJECT_ROOT\" -Force
-Copy-Item "$TEMPLATE_DIR\04-配置文件\pytest.ini" "$PROJECT_ROOT\" -Force
-Copy-Item "$TEMPLATE_DIR\04-配置文件\.mcp.json" "$PROJECT_ROOT\" -Force
-Copy-Item "$TEMPLATE_DIR\04-配置文件\requirements.txt" "$PROJECT_ROOT\" -Force
+Copy-Item "$TEMPLATE_DIR\config\conftest.py" "$PROJECT_ROOT\" -Force
+Copy-Item "$TEMPLATE_DIR\config\pytest.ini" "$PROJECT_ROOT\" -Force
+Copy-Item "$TEMPLATE_DIR\config\.mcp.json" "$PROJECT_ROOT\" -Force
+Copy-Item "$TEMPLATE_DIR\config\requirements.txt" "$PROJECT_ROOT\" -Force
if (-not (Test-Path "$PROJECT_ROOT\.env")) {
- Copy-Item "$TEMPLATE_DIR\04-配置文件\.env.example" "$PROJECT_ROOT\.env" -Force
+ Copy-Item "$TEMPLATE_DIR\config\.env.example" "$PROJECT_ROOT\.env" -Force
}
-# 拷贝 utils(49 个 .py + __init__.py)
+# 拷贝 utils(78 个 .py + __init__.py)
$utils = @(
"__init__.py", "api_retry_util.py", "data_factory.py",
"data_masking.py", "excel_generator.py", "flaky_detector.py",
@@ -176,13 +176,13 @@ $utils = @(
"slo_validator.py", "email_sender.py", "suite_minimizer.py"
)
foreach ($f in $utils) {
- Copy-Item "$TEMPLATE_DIR\05-代码示例\$f" "$PROJECT_ROOT\utils\" -Force
+ Copy-Item "$TEMPLATE_DIR\utils\$f" "$PROJECT_ROOT\utils\" -Force
}
# 拷贝 CI/CD 文件
-Copy-Item "$TEMPLATE_DIR\06-CICD集成\github-actions-test.yml" `
+Copy-Item "$TEMPLATE_DIR\ci\github-actions-test.yml" `
"$PROJECT_ROOT\.github\workflows\test.yml" -Force
-Copy-Item "$TEMPLATE_DIR\06-CICD集成\jenkins-pipeline.groovy" `
+Copy-Item "$TEMPLATE_DIR\ci\jenkins-pipeline.groovy" `
"$PROJECT_ROOT\Jenkinsfile" -Force
Write-Host "✅ 文件部署完成"
@@ -273,7 +273,7 @@ TEMPLATE_DIR="${TEMPLATE_DIR:-$(pwd)}"
PROJECT_ROOT="${1:-$(pwd)/test-project}"
echo "=========================================="
-echo " Test-Agent 工作流部署 V1.32.5"
+echo " Test-Agent 工作流部署 V1.36.0"
echo " 模板目录: $TEMPLATE_DIR"
echo " 项目目录: $PROJECT_ROOT"
echo "=========================================="
@@ -304,19 +304,19 @@ mkdir -p "$PROJECT_ROOT"/workspace/执行日志/{allure-results,jmeter-results,j
# ===== 4. 拷贝 Agent / Skill 定义(显式列名)=====
for f in 01-测试主管 02-需求分析 03-用例设计 04-环境管理 05-数据准备 06-自动化脚本 07-测试执行 08-Bug管理 09-报告生成 10-移动测试 11-桌面测试 12-视觉游戏测试 13-系统集成测试 14-AI模型测试; do
- cp "$TEMPLATE_DIR/02-专家定义/${f}.md" "$PROJECT_ROOT/.claude/agents/"
+ cp "$TEMPLATE_DIR/agents/${f}.md" "$PROJECT_ROOT/.claude/agents/"
done
for f in smoke-test test-coordinator regression-test testcase-design python-script-gen jmeter-script-gen data-preparation zentao-bug-submission mobile-test desktop-test visual-test system-test ai-test; do
- cp "$TEMPLATE_DIR/03-技能定义/${f}.md" "$PROJECT_ROOT/.claude/skills/"
+ cp "$TEMPLATE_DIR/skills/${f}.md" "$PROJECT_ROOT/.claude/skills/"
done
# ===== 5. 配置文件 =====
-cp "$TEMPLATE_DIR/04-配置文件/conftest.py" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/pytest.ini" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/.mcp.json" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/requirements.txt" "$PROJECT_ROOT/"
-[[ -f "$PROJECT_ROOT/.env" ]] || cp "$TEMPLATE_DIR/04-配置文件/.env.example" "$PROJECT_ROOT/.env"
+cp "$TEMPLATE_DIR/config/conftest.py" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/pytest.ini" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/.mcp.json" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/requirements.txt" "$PROJECT_ROOT/"
+[[ -f "$PROJECT_ROOT/.env" ]] || cp "$TEMPLATE_DIR/config/.env.example" "$PROJECT_ROOT/.env"
# ===== 6. utils(含新增 8 个 + 现有 3 个 + __init__)=====
for f in __init__.py api_retry_util.py data_factory.py data_masking.py \
@@ -335,12 +335,12 @@ for f in __init__.py api_retry_util.py data_factory.py data_masking.py \
push_test.py a11y_scanner.py i18n_checker.py \
mutation_runner.py dora_metrics.py blockchain_test.py ai_adversarial.py \
slo_validator.py email_sender.py suite_minimizer.py; do
- cp "$TEMPLATE_DIR/05-代码示例/${f}" "$PROJECT_ROOT/utils/"
+ cp "$TEMPLATE_DIR/utils/${f}" "$PROJECT_ROOT/utils/"
done
# ===== 7. CI/CD =====
-cp "$TEMPLATE_DIR/06-CICD集成/github-actions-test.yml" "$PROJECT_ROOT/.github/workflows/test.yml"
-cp "$TEMPLATE_DIR/06-CICD集成/jenkins-pipeline.groovy" "$PROJECT_ROOT/Jenkinsfile"
+cp "$TEMPLATE_DIR/ci/github-actions-test.yml" "$PROJECT_ROOT/.github/workflows/test.yml"
+cp "$TEMPLATE_DIR/ci/jenkins-pipeline.groovy" "$PROJECT_ROOT/Jenkinsfile"
# ===== 8. Python 虚拟环境 =====
cd "$PROJECT_ROOT"
@@ -563,7 +563,7 @@ pip install -U -r requirements.txt
```bash
cd /path/to/Test-Agent
git pull
-TEMPLATE_DIR=$(pwd) ./01-快速开始/deploy.sh /path/to/your-test-project
+TEMPLATE_DIR=$(pwd) ./docs/getting-started/deploy.sh /path/to/your-test-project
```
> 升级会**覆盖** `.claude/agents/`、`.claude/skills/`、`utils/`、`conftest.py`、`pytest.ini`、`requirements.txt`、`.mcp.json`、`.github/workflows/test.yml`、`Jenkinsfile`。
@@ -638,7 +638,7 @@ python -c "from utils.api_retry_util import call_with_retry; print('utils OK')"
## V1.1.0 · 运行时层 `runtime/` 部署
-> 运行时层是可选的(用户也可只用 16 专家+32 Skill+49 脚本的 V1.0.0 工作流模式)。
+> 运行时层是可选的(用户也可只用 16 专家+32 Skill+67 脚本的 V1.0.0 工作流模式)。
> 想要 HTTP/CLI 一键跑、AI 路由、飞轮存储,启它。
### 1. 起本地依赖(Docker)
diff --git "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md" "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md"
similarity index 97%
rename from "01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md"
rename to "docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md"
index 5b618ac..a581bfd 100644
--- "a/01-\345\277\253\351\200\237\345\274\200\345\247\213/\351\205\215\347\275\256\346\270\205\345\215\225.md"
+++ "b/docs/getting-started/\351\205\215\347\275\256\346\270\205\345\215\225.md"
@@ -14,7 +14,7 @@
| `pytest.ini` | pytest 行为(markers / addopts / timeout) | ✅(部署默认值即可) |
| `conftest.py` | pytest 全局 fixture(项目根唯一) | ✅(部署默认值即可) |
| `requirements.txt` | Python 依赖 | ✅ |
-| `.claude/agents/*.md` | 14 个 Agent 定义(核心 9 + 平台扩展 5) | ✅ |
+| `.claude/agents/*.md` | 16 个 Agent 定义(核心 9 + 平台扩展 5 + 垂直领域 2) | ✅ |
| `.claude/skills/*.md` | 32 个 Skill 定义(通用 8 + 平台 5 + 渗透 7 + 车载 5 + ECC 6 + 探索 1) | ✅ |
| `workspace/regression_modules.yaml` | 回归范围模块映射 | ⚪ 可选 |
| `workspace/执行日志/baselines/perf_baseline.json` | 性能基线 | ⚪ 自动生成(首次 release 跑 full 后) |
@@ -147,7 +147,7 @@
如需启用 zentao / wechat / feishu / dingtalk MCP 通道:
-1. 实现对应 mcp_server 模块(参考 `04-配置文件/mcp-server-impl.md` 骨架)
+1. 实现对应 mcp_server 模块(参考 `config/mcp-server-impl.md` 骨架)
2. 在 `.mcp.json` `mcpServers` 中追加配置
3. 重启 Claude Code
@@ -311,7 +311,7 @@ test -f conftest.py && echo "✅ conftest.py" || echo "❌"
test -f pytest.ini && echo "✅ pytest.ini" || echo "❌"
# 4. utils 完整性
-ls utils/*.py | wc -l # 应该 49 个
+ls utils/*.py | wc -l # 应该 76 个
# 5. agent / skill
ls .claude/agents/*.md | wc -l # 14 个
@@ -402,8 +402,8 @@ pip install -U -r requirements.txt
| pytest.ini 加 marker | 自动化脚本对应 `@pytest.mark.X` 必须用已注册 marker |
| utils 加新模块 | requirements.txt(如有新依赖) + 部署说明 utils 列表 + 源 MD 同步 |
| .mcp.json 加新 server | mcp-server-impl.md 教程 + 配置清单(本文件) |
-| 加新 Agent | `.claude/agents/` + 02-专家定义/ + test-coordinator skill 流程 + 源 MD |
-| 加新 Skill | `.claude/skills/` + 03-技能定义/ + 使用手册 skill 列表 + 源 MD |
+| 加新 Agent | `.claude/agents/` + agents/ + test-coordinator skill 流程 + 源 MD |
+| 加新 Skill | `.claude/skills/` + skills/ + 使用手册 skill 列表 + 源 MD |
---
diff --git a/docs/history/2026-5-10 README 085504.md b/docs/history/2026-5-10 README 085504.md
new file mode 100644
index 0000000..d54e6be
--- /dev/null
+++ b/docs/history/2026-5-10 README 085504.md
@@ -0,0 +1,235 @@
+# 🤖 Test-Agent 工作流
+
+> **Claude Code 驱动的全链路软件测试自动化工作流**
+> 14 Agent · 13 Skill · 24 Utils · 全平台覆盖(Web/API/移动/桌面/小程序/游戏/IoT/AI)· 一键部署
+
+[](https://www.python.org)
+[](https://docs.anthropic.com/claude-code)
+[](https://pytest.org)
+[](https://jmeter.apache.org)
+[](LICENSE)
+
+---
+
+## ✨ 核心特性
+
+- 🎯 **9 核心 + 5 平台扩展 = 14 个测试专家 Agent**,test-lead 智能调度
+- 📋 **13 个一键技能**:冒烟 / 回归 / 完整流程 / 用例生成 / 性能 / Bug 提交 / 多平台专项
+- 🌐 **全链路覆盖**:Web · API · Android/iOS · 微信小程序 · Windows EXE · macOS · Electron · 游戏 · IoT · AI/LLM
+- 📡 **20+ 协议**:HTTP · WebSocket · gRPC · TCP/UDP · GraphQL · SOAP · MQTT · Kafka · Modbus · 串口 等
+- 📄 **多格式 PRD**:md · pdf · docx · xlsx · pptx · zip · png · html · URL(自动平台识别 + 路由)
+- 📊 **多格式输出**:Word · Excel · PDF · PPTX · HTML · JSON · Markdown · 邮件 · 三端 webhook
+- 🚦 **分层质量门禁**:smoke ≥95% · regression ≥90% · 覆盖率 ≥80% · 性能 TPS/P95 双模式
+- 🔁 **JMeter 双模式**:CI 快验(5 并发)+ Release 完整压测(50 并发,含基线对比)
+- 📦 **一键部署**:单行 curl 命令,自动建目录、装依赖、配 CI
+- 🔄 **依赖自治**:Dependabot 周扫描 + pip-audit/safety 拦 CVE + 季度人工评审 SOP
+
+---
+
+## 🚀 Quick Start(一行命令)
+
+```bash
+curl -fsSL https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.sh | bash -s -- /path/to/your-test-project
+```
+
+`install.sh` 自动完成:
+
+1. ✅ 检查工具(git/python3/node/npm/Java)
+2. ✅ 装 Claude Code CLI
+3. ✅ 克隆模板
+4. ✅ 部署 14 agent + 13 skill + 24 utils + CI/CD 文件
+5. ✅ 创建 `.venv` + 装 Python 依赖 + Playwright
+
+完成后 3 步开测:
+
+```bash
+cd /path/to/your-test-project
+notepad .env # 填 8 个必填字段
+claude /login # 首次登录 Claude
+claude # 启动
+> /smoke-test # 在 Claude 提示符里跑冒烟
+```
+
+---
+
+## 📁 项目结构
+
+```text
+Test-Agent工作流搭建/
+├── 00-项目导航.md ← 5 维度分类速查
+├── docs/getting-started/ ← 使用手册 / 部署 / 配置清单 / 交付物
+├── agents/ ← 14 个 Agent(核心 9 + 平台扩展 5)
+├── skills/ ← 13 个 Skill(通用 8 + 平台 5)
+├── config/ ← conftest / pytest.ini / .env / .mcp.json / requirements
+├── utils/ ← 49 个 utils(核心 11 + 平台 9 + 协议 2 + 非功能 6 + 用例方法 2 + 类型 2 + 安全增强 2 + DB/契约/API 3 + 移动专项 1 + a11y/i18n 2 + 度量 2 + 区块链/AI 对抗 2 + 输入 1 + __init__)
+├── ci/ ← GitHub Actions + Jenkins
+├── install.sh ← 一键部署脚本
+└── README.md
+```
+
+---
+
+## 🎯 测试技能速查
+
+### 通用流程
+
+| Skill | 用途 |
+|-------|-----|
+| `/smoke-test` | 10 分钟 P0 冒烟(≥95% 门禁) |
+| `/test-coordinator` | 完整流程编排(自动平台路由) |
+| `/regression-test` | P0+P1 回归 + Flaky + JMeter |
+| `/testcase-design` | 4 Sheet Excel 用例 |
+| `/python-script-gen` | pytest UI/API 脚本 |
+| `/jmeter-script-gen` | JMeter 性能脚本(双模式) |
+| `/data-preparation` | 测试数据 + JMeter CSV |
+| `/zentao-bug-submission` | 禅道 Bug 规范提交 |
+
+### 平台专项
+
+| Skill | 平台 |
+|-------|------|
+| `/mobile-test` | Android · iOS · 微信/支付宝小程序 |
+| `/desktop-test` | Windows EXE · macOS .app · Linux GUI · Electron |
+| `/visual-test` | 游戏 · Canvas/WebGL · OCR · 视觉回归 |
+| `/system-test` | IoT 嵌入式 · 音视频 · 链路追踪 · 消息队列 |
+| `/ai-test` | AI/ML 模型 · 数据漂移 · LLM 应用 |
+
+---
+
+## 📚 文档导航
+
+| 文档 | 用途 |
+|------|------|
+| [00-项目导航.md](00-项目导航.md) | 按职责分类速查(强烈推荐) |
+| [docs/getting-started/使用手册.md](docs/getting-started/使用手册.md) | 启动指引 + 13 skill 详解 + FAQ |
+| [docs/getting-started/部署说明.md](docs/getting-started/部署说明.md) | 跨平台部署(Win/Mac/Linux)+ Java/JMeter/Allure 安装 |
+| [docs/getting-started/配置清单.md](docs/getting-started/配置清单.md) | .env 全字段 + Secrets / Webhook 申请 |
+| [docs/getting-started/交付物清单.md](docs/getting-started/交付物清单.md) | 测试计划 / 报告 / Bug 提交位置 |
+
+---
+
+## 🛠️ 技术栈
+
+| 类型 | 工具 |
+|------|------|
+| 测试框架 | pytest 7.4 + pytest-xdist + pytest-rerunfailures + pytest-mock + pytest-playwright |
+| UI 自动化 | Playwright 1.40(Web/Electron)/ Appium 4.0(移动)/ pywinauto 0.6(Windows EXE)/ PyAutoGUI(macOS) |
+| API | requests 2.31 + websocket-client + websockets + grpcio + paho-mqtt + paramiko + kafka-python + pika |
+| 性能 | JMeter 5.6.3(主) + Locust 2.25(备) |
+| 视觉 | Airtest 1.3 + OpenCV 4.8 + scikit-image + Tesseract |
+| AI | scikit-learn + scipy(漂移) + LLM 评估 |
+| 数据 | Faker + Factory Boy + SQLAlchemy + pdfplumber + python-docx + openpyxl |
+| 报告 | Allure 2.13 + python-docx 1.1 + 三端 webhook(企微/飞书/钉钉) |
+| Bug | 禅道 SDK |
+| CI/CD | GitHub Actions + Jenkins |
+| AI 模型 | Claude 4.x(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理) |
+
+---
+
+## 🔄 跨工具兼容性
+
+Claude Code 是**默认推荐**而非强制:
+
+- ✅ **`utils/` + pytest + JMeter + CI** 完全跨工具(纯 Python / 标准 CI 文件)
+- ⚠️ **`.claude/agents/` + `.claude/skills/`** 是 Claude Code 独有,迁移其他工具(Cursor / Continue)需重写为对应格式
+- 🔌 **`.mcp.json`** 是 MCP 开放协议,Claude Desktop / Cursor 部分支持
+
+---
+
+## 📊 全链路覆盖矩阵(三视角)
+
+### 矩阵 A:产品形态覆盖
+
+| 产品形态 | 工具栈 | 状态 |
+|---------|-------|------|
+| Web(PC + 移动 H5) | Playwright | ✅ |
+| REST / GraphQL / SOAP API | requests / protocol_helper | ✅ |
+| Android APP | Appium + adb | ✅ |
+| iOS APP | Appium + XCUITest | ✅ |
+| 微信 / 支付宝 / 抖音小程序 | 微信开发者工具 CLI | ✅ |
+| Windows EXE | pywinauto + uiautomation | ✅ |
+| macOS .app | PyAutoGUI + AppleScript | ✅ |
+| Linux GUI | atspi + xdotool | ✅ |
+| Electron 跨平台 | Playwright Electron API | ✅ |
+| 游戏 / Canvas / WebGL / Unity | Airtest + OpenCV | ✅ |
+| IoT / 嵌入式 | SSH + 串口 + MQTT + Modbus | ✅ |
+| 音视频 / 流媒体 | FFmpeg + ffprobe | ✅ |
+| AI / ML 模型 + LLM | scikit-learn + scipy + LLM eval | ✅ |
+| 区块链 / 智能合约 | Web3 + Slither + Foundry | ✅ |
+
+### 矩阵 B:测试类型覆盖
+
+| 测试类型 | 工具 / utils | 状态 |
+|---------|------------|------|
+| 功能(单元/集成/系统/UAT BDD) | pytest + pytest-mock + pytest-bdd | ✅ |
+| 性能(基准/负载/压力/Volume/Spike/Soak/SLO) | JMeter + Locust + slo_validator + soak_runner | ✅ |
+| 安全(SAST/DAST/依赖/Header/TLS/API/Fuzzing) | Bandit + Safety + ZAP + Burp Pro + api_security_scanner + fuzzer | ✅ |
+| 兼容(浏览器/OS/分辨率/语言矩阵) | compatibility_matrix(pairwise) | ✅ |
+| 弱网(3G/4G/wifi_weak/satellite/offline) | tc + Toxiproxy + network_throttle | ✅ |
+| 稳定(Android Monkey + 长时 soak + 内存泄漏) | mobile_driver.run_monkey + soak_runner | ✅ |
+| 可靠性(重连/重试/降级/熔断) | api_retry_util + 业务故障注入 | ✅ |
+| 混沌(CPU/内存/磁盘/网络/进程/k8s) | chaos_helper | ✅ |
+| 灾备 / Failover | chaos_helper.kill_pod + 数据一致性校验 | ✅ |
+| UX(任务时长/点击数/TTI/恢复率) | ux_metrics.UXTracker | ✅ |
+| 易用性(Nielsen 10 + 角色扮演) | 人工 walkthrough | ✅ |
+| 探索性(SBTM session + heuristics) | charter 模板 + 录屏 | ✅ |
+| 前端性能 Web Vitals(LCP/FID/CLS/INP) | web_vitals_collector | ✅ |
+| A11y 无障碍(WCAG 2.1) | a11y_scanner | ✅ |
+| 国际化 / 本地化(多语言/RTL) | i18n_checker | ✅ |
+| 数据库(事务/死锁/迁移/备份恢复/主从) | db_test_helper | ✅ |
+| 契约测试(Pact / jsonschema) | contract_test + openapi_test_gen | ✅ |
+| 视觉回归(SSIM + OCR + diff) | visual_helper | ✅ |
+| AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ✅ |
+| 变异测试(用例有效性) | mutation_runner | ✅ |
+| DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | ✅ |
+
+### 矩阵 C:用例设计方法(ISTQB 经典)
+
+| 方法 | utils / 实现 | 状态 |
+|------|------------|------|
+| 等价类划分 / 边界值 | testcase-designer 文档 + Excel 模板 | ✅ |
+| 判定表 / 因果图 | 文档手动 + Excel | ✅ |
+| 场景法 / 错误推测 | testcase-designer 文档 | ✅ |
+| 状态迁移法(0/1-switch) | state_machine_tester | ✅ |
+| 配对测试(Allpairs) | pairwise_generator | ✅ |
+| 正交实验法 | compatibility_matrix(隐含) | ✅ |
+| 探索性测试(SBTM) | charter 模板 | ✅ |
+| 易用性走查(Nielsen 10) | 人工 + 检查清单 | ✅ |
+| 基于风险的测试 | 风险矩阵文档 | ✅ |
+
+### 测试金字塔分布
+
+```text
+ E2E(10%) ← Playwright/Appium,慢但必要
+ /集成(20%)/ ← API + 服务间 + Mock
+ /单元(70%)/ ← pytest + pytest-mock,秒级反馈
+```
+
+**总覆盖率 ~99%**(含闭环工具链:Bug 闭环禅道 + 三端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
+
+---
+
+## 🤝 Contributing
+
+详见 [`CONTRIBUTING.md`](CONTRIBUTING.md)(添加 agent / skill / utils / marker / .env 流程 + 提交规范 + PR 自检脚本)。
+
+---
+
+## 📜 License
+
+MIT License - 详见 [LICENSE](LICENSE)
+
+---
+
+## 🙏 致谢
+
+- [Claude Code](https://docs.anthropic.com/claude-code) - Anthropic 官方 CLI
+- [pytest](https://pytest.org) - Python 测试框架之王
+- [Playwright](https://playwright.dev) - 跨浏览器自动化
+- [Appium](https://appium.io) - 移动端自动化
+- [Apache JMeter](https://jmeter.apache.org) - 性能测试
+- [Airtest](https://airtest.netease.com) - 跨平台图像识别测试
+
+---
+
+> **Made with Wool · Tested for Everything**
diff --git a/docs/history/2026-5-10 README_DETAIL 085340.md b/docs/history/2026-5-10 README_DETAIL 085340.md
new file mode 100644
index 0000000..d92488c
--- /dev/null
+++ b/docs/history/2026-5-10 README_DETAIL 085340.md
@@ -0,0 +1,415 @@
+# Test-Agent 测试全流程专家团队
+
+**项目目录名**:`Test-Agent工作流搭建`
+**版本**:V1.0.0
+**更新日期**:2026-05-10
+**模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理)
+
+---
+
+## 📚 文档导航
+
+| 路径 | 文档 | 说明 | 适用对象 |
+|------|------|------|----------|
+| 根目录 | README.md | 本文档(项目入口) | 所有用户 |
+| **根目录** | **00-项目导航.md** | **按职责分类速查(通用流程 / 平台专项 / 协议 / 输入 / CI)** | **所有用户** |
+| `docs/getting-started/` | 使用手册.md | 快速上手指南 + FAQ | 所有用户 |
+| `docs/getting-started/` | 部署说明.md | 跨平台部署(Win/Mac/Linux 含 Java/JMeter/Allure) | 运维/测试 |
+| `docs/getting-started/` | 配置清单.md | 一站式配置文档(.env 全字段 + Secrets + Webhook 申请) | 所有用户 |
+| `docs/getting-started/` | 交付物清单.md | 测试计划 / 测试报告 / Bug 等对外提交物落地位置与责任 | 所有用户 |
+| `agents/` | 14 个 .md(9 核心 + 5 平台扩展) + README 索引 | Agent 定义文件 | 开发人员 |
+| `skills/` | 13 个 Skill 文件 + README 索引 | 可复用测试技能 | 开发人员 |
+| `config/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 |
+| `config/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 |
+| `utils/` | utils(49 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 |
+| `ci/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps |
+
+---
+
+## 🚀 核心特性
+
+### 8 位专家 + 1 位协调者
+
+| 角色 | 职责 |
+|------|------|
+| **test-lead**(协调者) | 全局调度、质量把控、发布决策、基线管理 |
+| requirements-analyst | 测试范围界定、风险识别、业务规则梳理(输出 MD + JSON 摘要) |
+| testcase-designer | 等价类/边界值/场景法,P0~P3 分级,4 Sheet Excel |
+| env-manager | 环境健康检查、多环境切换、Docker 支持 |
+| data-preparer | 数据工厂(Faker+Factory Boy)、自动清理、脱敏、JMeter CSV |
+| automation-engineer | Playwright(UI)+ requests(API)+ JMeter 驱动(性能)+ Locust(开发期备用) |
+| test-executor | 并行执行、失败分类、Flaky 隔离、JMeter 性能阶段 |
+| bug-manager | 禅道提交(severity 1=P0)、生命周期追踪、回归验证 |
+| report-generator | Allure + JMeter HTML + Word + 三端通知(企微/飞书/钉钉,curl 直连) |
+
+### 13 个执行技能
+
+**核心 8 个**:
+
+- `smoke-test`:10 分钟 P0 冒烟(含 1 分钟缓冲,门禁 95%)
+- `test-coordinator`:完整流程编排
+- `regression-test`:P0+P1 回归 + Flaky 检测 + JMeter 性能验证
+- `testcase-design`:4 Sheet Excel 用例
+- `python-script-gen`:pytest UI/API 脚本
+- `jmeter-script-gen`:JMeter JMX 脚本(CI quick / full 双模式)
+- `data-preparation`:测试数据 + JMeter 参数化 CSV
+- `zentao-bug-submission`:禅道 Bug 规范提交
+
+**平台扩展 5 个**:
+
+- `mobile-test`:Android / iOS / 微信小程序(Appium + 微信 CLI)
+- `desktop-test`:Windows EXE / macOS GUI / Electron(pywinauto + Playwright Electron)
+- `visual-test`:游戏 / 视觉回归 / OCR(Airtest + OpenCV + Tesseract)
+- `system-test`:IoT / 音视频 / 链路追踪 / MQ(SSH+串口+MQTT+FFmpeg+Jaeger+Kafka)
+- `ai-test`:模型质量 / 数据漂移 / 公平性 / LLM 评估
+
+### 工程级质量门禁(分层)
+
+**功能门禁**
+
+| 指标 | 冒烟 | 回归 |
+|------|------|------|
+| P0 通过率 | ≥95% | 100% |
+| P1 通过率 | - | ≥95% |
+| 整体通过率 | - | ≥90% |
+| 代码覆盖率($APP_SRC_PATH) | - | ≥80% |
+| Flaky 比例 | - | <5% |
+
+**性能门禁(双模式)**
+
+| 指标 | full(50并发) | ci_quick(5并发) |
+|------|--------------|------------------|
+| TPS | ≥100 | ≥20 |
+| P95 响应 | ≤500ms | ≤800ms |
+| 平均响应 | ≤200ms | ≤400ms |
+| 错误率 (pct) | <1% | <1% |
+| 基线回归 | <20% | 不强制 |
+
+### 工程化规范
+
+- **指数退避重试**:`utils/api_retry_util.call_with_retry`(10s → 20s → 40s)
+- **pytest-xdist** 并行执行(默认 4 进程,可调)
+- **Flaky 检测与隔离**:`utils/flaky_detector` + `workspace/执行日志/history/` 归档
+- **性能基线管理**:`workspace/执行日志/baselines/perf_baseline.json`,仅 release+full+PASS 自动更新
+- **CI/CD 就绪**:GitHub Actions + Jenkins,性能阶段双模式分层
+- **MCP 收口**:当前仅启用 filesystem;通知/Bug 走 SDK 直连
+
+---
+
+## 🌐 全链路覆盖矩阵(三视角)
+
+### 矩阵 A:产品形态覆盖
+
+| 产品形态 | 工具栈 | 责任 Agent | 关联 Skill | 状态 |
+|---------|-------|-----------|----------|------|
+| Web(PC + 移动 H5) | Playwright | automation-engineer | `/python-script-gen` | ✅ |
+| REST / GraphQL / SOAP API | requests / protocol_helper | automation-engineer | `/python-script-gen` | ✅ |
+| Android APP | Appium + adb | mobile-tester | `/mobile-test` | ✅ |
+| iOS APP | Appium + XCUITest | mobile-tester | `/mobile-test` | ✅ |
+| 微信 / 支付宝 / 抖音小程序 | 微信开发者工具 CLI | mobile-tester | `/mobile-test` | ✅ |
+| Windows EXE | pywinauto + uiautomation | desktop-tester | `/desktop-test` | ✅ |
+| macOS .app | PyAutoGUI + AppleScript | desktop-tester | `/desktop-test` | ✅ |
+| Linux GUI | atspi + xdotool | desktop-tester | `/desktop-test` | ✅ |
+| Electron 跨平台 | Playwright Electron API | desktop-tester | `/desktop-test` | ✅ |
+| 游戏 / Canvas / WebGL / Unity | Airtest + OpenCV | visual-tester | `/visual-test` | ✅ |
+| IoT / 嵌入式 | SSH + 串口 + MQTT + Modbus | system-tester | `/system-test` | ✅ |
+| 音视频 / 流媒体 | FFmpeg + ffprobe | system-tester | `/system-test` | ✅ |
+| AI / ML 模型 + LLM | scikit-learn + scipy + LLM eval | ai-tester | `/ai-test` | ✅ |
+| 区块链 / 智能合约 | Web3 + Slither + Foundry | system-tester | `/system-test` | ✅ |
+| 数据库 | SQLAlchemy + db_test_helper | data-preparer | `/data-preparation` | ✅ |
+
+### 矩阵 B:测试类型覆盖
+
+| 测试类型 | 工具 / utils | 责任 Agent | 状态 |
+|---------|------------|-----------|------|
+| 功能(unit / integration / e2e / UAT BDD) | pytest + pytest-mock + pytest-bdd | automation-engineer | ✅ |
+| 性能(基准/负载/压力/Volume/Spike/Soak/SLO) | JMeter + Locust + slo_validator + soak_runner | test-executor | ✅ |
+| 安全(SAST/DAST/依赖/Header/TLS/API/Fuzzing) | Bandit + Safety + ZAP + Burp Pro + api_security_scanner + fuzzer | bug-manager | ✅ |
+| 兼容(浏览器/OS/分辨率/语言矩阵) | compatibility_matrix(pairwise) | testcase-designer | ✅ |
+| 弱网(3G/4G/wifi_weak/satellite/offline) | tc + Toxiproxy + network_throttle | test-executor | ✅ |
+| 稳定(Android Monkey + 长时 soak + 内存泄漏) | mobile_driver.run_monkey + soak_runner | mobile-tester | ✅ |
+| 可靠性(重连/重试/降级/熔断) | api_retry_util + 业务故障注入 | automation-engineer | ✅ |
+| 混沌(CPU/内存/磁盘/网络/进程/k8s) | chaos_helper | test-executor | ✅ |
+| 灾备 / Failover | chaos_helper.kill_pod + 数据一致性校验 | test-executor | ✅ |
+| UX(任务时长/点击数/TTI/恢复率) | ux_metrics.UXTracker | testcase-designer | ✅ |
+| 易用性(Nielsen 10 + 角色扮演) | 人工 walkthrough | testcase-designer | ✅ |
+| 探索性(SBTM session + heuristics) | charter 模板 + 录屏 | testcase-designer | ✅ |
+| 前端性能 Web Vitals(LCP/FID/CLS/INP) | web_vitals_collector | automation-engineer | ✅ |
+| A11y 无障碍(WCAG 2.1) | a11y_scanner(axe + Lighthouse + pa11y) | testcase-designer | ✅ |
+| 国际化 / 本地化(多语言/RTL) | i18n_checker | testcase-designer | ✅ |
+| 数据库(事务/死锁/迁移/备份恢复/主从) | db_test_helper | data-preparer | ✅ |
+| 契约测试(Pact / jsonschema) | contract_test + openapi_test_gen | automation-engineer | ✅ |
+| 视觉回归(SSIM + OCR + diff) | visual_helper | visual-tester | ✅ |
+| AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ai-tester | ✅ |
+| 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ |
+| DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ |
+
+### 矩阵 C:用例设计方法(ISTQB 经典)
+
+| 方法 | 实现 | 责任 Agent | 状态 |
+|------|------|-----------|------|
+| 等价类划分 / 边界值 | 文档 + Excel 模板 | testcase-designer | ✅ |
+| 判定表 / 因果图 | 文档手动 + Excel | testcase-designer | ✅ |
+| 场景法 / 错误推测 | 文档 | testcase-designer | ✅ |
+| 状态迁移法(0/1-switch + 负例) | state_machine_tester | testcase-designer | ✅ |
+| 配对测试(Allpairs) | pairwise_generator | testcase-designer | ✅ |
+| 正交实验法 | compatibility_matrix(隐含) | testcase-designer | ✅ |
+| 探索性测试(SBTM) | charter 模板 | testcase-designer | ✅ |
+| 易用性走查(Nielsen 10) | 人工 + 检查清单 | testcase-designer | ✅ |
+| 基于风险的测试 | 风险矩阵文档 | test-lead | ✅ |
+
+### 矩阵 D:协议覆盖
+
+| 协议 | 实现 utils | 状态 |
+|------|----------|------|
+| HTTP / HTTPS | api_retry_util | ✅ |
+| WebSocket(同步/异步/重连/并发) | websocket_helper | ✅ |
+| gRPC / TCP / UDP / GraphQL / SOAP / Modbus | protocol_helper | ✅ |
+| MQTT / SSH / 串口 | iot_helper | ✅ |
+| Kafka / RabbitMQ | mq_helper | ✅ |
+| Jaeger / Zipkin(链路追踪) | tracing_validator | ✅ |
+
+### 测试金字塔分布
+
+```text
+ E2E(10%) ← Playwright/Appium,慢但必要
+ /集成(20%)/ ← API + 服务间 + Mock
+ /单元(70%)/ ← pytest + pytest-mock,秒级反馈
+```
+
+**总覆盖率 ~99%**(含闭环:Bug 禅道 + 三端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
+
+剩 ~1% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。
+
+---
+
+## 🏗️ 架构图(运行时)
+
+```
+┌────────────────────────────────────────────────────────────────┐
+│ test-lead(协调者) │
+│ 全局调度 / 质量门禁 / 风险决策 / 基线管理 │
+└────────────────────────────────────────────────────────────────┘
+ │
+ ┌────────┴───────────────┐
+ ↓ ↓
+[requirements-analyst] [testcase-designer]
+ │ │
+ └────────┬───────────────┘
+ ↓
+ [env-manager] ──→ 串行(基础 connectivity 通过后)──→ [data-preparer]
+ │ │
+ └───────────────────┬──────────────────────────┘
+ ↓
+ [automation-engineer]
+ pytest 脚本 + /jmeter-script-gen → JMX
+ ↓
+ /smoke-test(门禁 95%)
+ ↓ 通过
+ [test-executor]
+ 功能回归(P0+P1)
+ ↓ 通过
+ [test-executor]
+ JMeter 性能(ci_quick / full)
+ ↓
+ [bug-manager]
+ ↓
+ [report-generator]
+ Allure + JMeter HTML + Word + 三端通知
+ ↓
+ test-lead 最终决策
+```
+
+---
+
+## ⚡ 快速开始
+
+### 1. GitHub 一键部署(最快)
+
+```bash
+# Mac / Linux 一行远程部署
+curl -fsSL https://raw.githubusercontent.com/YOUR-USER/Test-Agent工作流搭建/main/install.sh | bash -s -- /path/to/your-test-project
+
+# 或先 clone 再本地跑
+git clone https://github.com/YOUR-USER/Test-Agent工作流搭建.git
+bash Test-Agent工作流搭建/install.sh /path/to/your-test-project
+```
+
+> 替换 `YOUR-USER` 为你的 GitHub 用户名。Windows / 手动方式见 `docs/getting-started/部署说明.md`。
+
+`install.sh` 自动完成:克隆模板 → 装 Claude Code → 建目录 → 拷贝全部文件 → 装 Python 依赖 + Playwright。
+
+### 2. 后续步骤
+
+详细启动指引(含 Java/JMeter/Allure 安装、.env 必填、首次跑通验证):
+
+→ `docs/getting-started/使用手册.md` 顶部 **🚀 启动指引** 章节
+
+### 2. 配置 .env(敏感信息)
+
+```bash
+cd your-test-project
+cp .env.example .env
+# 编辑 .env,填入 TEST_APP_URL / TEST_DB_* / ZENTAO_* / WECHAT_WEBHOOK_URL 等
+```
+
+### 3. 启动 Claude Code
+
+```bash
+cd your-test-project
+claude
+```
+
+### 4. 在 Claude Code 提示符使用斜杠技能
+
+```
+> /smoke-test # 10 分钟 P0 冒烟
+> /test-coordinator # 完整流程
+> /regression-test # 回归 + JMeter
+> /testcase-design # 仅生成用例 Excel
+> /python-script-gen # 生成 pytest 脚本
+> /jmeter-script-gen # 生成 JMeter JMX
+> /data-preparation # 测试数据 + JMeter CSV
+> /zentao-bug-submission # 提交 Bug 到禅道
+```
+
+或自然语言:
+
+```
+> 帮我对用户登录功能进行完整测试。需求:手机号+密码登录,记住密码,
+> 连续失败 5 次锁定 30 分钟。
+```
+
+> 注:`>` 后面是 Claude Code 提示符的输入(斜杠技能或自然语言),**不是 shell 命令**。
+
+---
+
+## 📋 工作流选择指南
+
+| 场景 | 推荐工作流 | 耗时 | 用例范围 | 触发 |
+|------|-----------|------|---------|------|
+| 上线前快速验证 | `/smoke-test` | ~10 分钟 | P0 | 手动 / CI |
+| 新功能完整测试 | `/test-coordinator` | ~2-4 小时 | 全部 | 手动 |
+| 迭代后回归 | `/regression-test` | ~1-2 小时 | P0+P1 | CI 自动 |
+| 数据准备 | `/data-preparation` | ~5 分钟 | - | 测试前自动 |
+| Bug 提交 | `/zentao-bug-submission` | ~2 分钟/个 | - | 失败后 |
+
+---
+
+## 🔧 技术栈速查
+
+| 类型 | 框架/工具 | 版本 | 说明 |
+|------|-----------|------|------|
+| 接口测试 | requests + pytest + allure-pytest | pytest 7.4.3 | |
+| UI 测试 | playwright + pytest-playwright | playwright 1.40.0 | |
+| 性能测试(主) | Apache JMeter | 5.6.3(需独立装 Java + JMeter) | CI/release 门禁权威 |
+| 性能测试(备) | locust | 2.25.0 | 开发期 Python 内压测 |
+| 测试数据 | faker + factory-boy | 20.x + 3.3.0 | utils/data_factory |
+| 覆盖率 | pytest-cov | 4.1.0 | cov 指向 $APP_SRC_PATH |
+| 并行执行 | pytest-xdist | 3.5.0 | 默认 4 进程 |
+| 失败重试 | pytest-rerunfailures | 13.0 | 命令行显式开启 |
+| Mock | pytest-mock | 3.12.0 | unittest.mock 包装 |
+| 配置 | PyYAML | 6.0.1 | regression_modules.yaml |
+| Excel | openpyxl | 3.1.2 | utils/excel_generator |
+| Word 报告 | python-docx | 1.1.0 | utils/generate_report |
+| Bug 管理 | 禅道 SDK 直连 | - | utils/zentao_bug_manager(severity 1=P0) |
+| 通知 | webhook curl 直连 | - | utils/generate_report.send_*(企微/飞书/钉钉) |
+| 重试 | tenacity / 自实现 | 8.2.3 | utils/api_retry_util(10/20/40s) |
+| AI 模型 | Claude 4.x 系列 | Opus 4.7 / Sonnet 4.6 | Claude Code 默认管理 |
+| MCP | filesystem | npm @modelcontextprotocol | 仅启用 filesystem |
+
+---
+
+## 🔐 闭环约定(设计原则)
+
+1. **数据**:测试数据落 `workspace/测试数据/test_data.json`(conftest fixture 直接消费)
+2. **覆盖率**:cov 指向 `$APP_SRC_PATH`(被测系统源码,不是测试脚本本身)
+3. **重试策略**:全栈统一 10/20/40s(指数退避),由 `utils/api_retry_util.call_with_retry` 提供
+4. **severity/pri 映射**:1=P0 / 2=P1 / 3=P2 / 4=P3,由 `utils/zentao_bug_manager.SEVERITY_MAP` 权威
+5. **error_rate 单位**:百分比 pct(字段名 `_pct` 后缀),全栈一致
+6. **基线管理**:仅 release 分支 + full 模式 + 全门禁 PASS 才更新 `perf_baseline.json`
+7. **门禁分层**:smoke / regression / performance_full / performance_ci_quick,由 `utils/ci_quality_gate.py` 与 `utils/jmeter_result_parser.py` 统一实现
+8. **MCP 通道**:当前仅 filesystem。通知/Bug 走 SDK 直连,4 个自定义 mcp_server(zentao/wechat/feishu/dingtalk)按需后续实现
+9. **prod 环境**:`get_current_env()` 直接 raise,禁止误测生产
+10. **Flaky 与 reruns**:冒烟阶段不开 reruns(保留 flaky 信号),回归阶段开 reruns(快速反馈),flaky 由 history 离线归档检测
+
+---
+
+## 📂 部署后目录速览
+
+```
+your-test-project/
+├── .claude/{agents,skills}/ ← 9 agent + 8 skill
+├── .github/workflows/test.yml
+├── Jenkinsfile
+├── utils/ ← 12 个 .py + __init__
+├── src/ ← 被测系统源码(cov 指向)
+├── workspace/
+│ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/
+│ ├── 自动化脚本/python/ jmeter/
+│ ├── regression_modules.yaml ← 回归范围配置(可选)
+│ └── 执行日志/
+│ ├── allure-results/ allure-report/
+│ ├── jmeter-results/ jmeter-report/
+│ ├── coverage.xml coverage-report/
+│ ├── baselines/perf_baseline.json
+│ ├── history/ ← junit-xml 归档
+│ ├── 截图/ 报告/
+├── conftest.py / pytest.ini / requirements.txt
+├── .mcp.json / .env
+```
+
+---
+
+## 🛠️ 升级 / 回滚 / 卸载
+
+详见 `docs/getting-started/部署说明.md` "升级 / 回滚 / 卸载" 章节。
+
+升级会覆盖:`.claude/agents/`、`.claude/skills/`、`utils/`、`conftest.py`、`pytest.ini`、`requirements.txt`、`.mcp.json`、`.github/workflows/test.yml`、`Jenkinsfile`。
+不会覆盖:`.env`、`workspace/`、`src/`。
+
+---
+
+## 🤝 协作与反馈
+
+- 文档结构、Bug 反馈:在仓库内提 issue
+- 功能扩展:先在 `agents/` 加 agent / `skills/` 加 skill,详见 `CONTRIBUTING.md`
+- 改动 `utils/` 时同步更新 `config/requirements.txt` 与 `ci/` 中的引用
+
+---
+
+## 🔄 跨 AI 工具兼容性
+
+**Claude Code 是默认 / 推荐 runtime,但本项目不强制绑定**。
+
+| 组件 | Claude Code 依赖 | 跨工具适配 |
+|------|----------------|----------|
+| `.claude/agents/*.md`(YAML frontmatter) | ✅ Claude Code spec | Cursor 用 `.cursorrules`;Continue.dev 用 `.continue/`;通用 LLM 拼接为 system prompt |
+| `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 |
+| `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 |
+| `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 |
+| `utils/*.py`(12 个) | ❌ 纯 Python | 跨工具完全可用 |
+| pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 |
+| CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 |
+| conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 |
+
+### 迁移成本
+
+- **工程链零改动**:utils + pytest + JMeter + CI 完全跨工具
+- **agent / skill 文档需重写**:迁移到 Cursor / Continue / 其他工具的对应格式
+- **失去**:Claude Code skill 自动加载、Agent tool 子专家协调、`.claude/` 目录约定
+
+### 模型选择
+
+- README 中 Claude 4.x(Opus 4.7 / Sonnet 4.6 / Haiku 4.5)是**推荐**而非强制
+- 项目代码本身**不调用任何 LLM API**(utils 全是工具代码)
+- 模型由 Claude Code 账户级管理:`claude /login` + `/model` 切换
+- 用其他 AI 工具时按其规范选模型即可
+
+---
+
+## 📜 LICENSE / CHANGELOG / CONTRIBUTING
+
+- LICENSE:项目按需选择(推荐 MIT / Apache-2.0)
+- CHANGELOG:建议建 `CHANGELOG.md` 记录版本演进(V1.0.0 首版)
+- CONTRIBUTING:维护者按需补充贡献流程
diff --git a/docs/history/2026-5-11 FULL_GUIDE 200746.md b/docs/history/2026-5-11 FULL_GUIDE 200746.md
new file mode 100644
index 0000000..ca3ff6f
--- /dev/null
+++ b/docs/history/2026-5-11 FULL_GUIDE 200746.md
@@ -0,0 +1,1217 @@
+# Test-Agent 完整指南(FULL_GUIDE)· 项目永久宪章
+
+> **本文档定位**:`test-agent-team` 项目的**永久记忆宪章** —— 跨会话、跨人员、跨工具的唯一权威来源。
+> 简明入口 → [README.md](README.md) ;按职责分类速查 → [00-项目导航.md](00-项目导航.md)。
+> **维护原则**:决策入档、开放问题入档、不打脸的承诺才写。重大决策须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。
+
+**项目代号**:`test-agent-team`(全英文)
+**项目目录名**:`Test-Agent`(中文别名 `Test-Agent团队`)
+**当前阶段**:Phase 1(V1.0.0 工程基线 + 概念宪章已成;MVP 行业待选)
+**版本**:V1.0.0(详见 [VERSION](VERSION) + [CHANGELOG.md](CHANGELOG.md))
+**更新日期**:2026-05-11
+**模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理)
+
+---
+
+## 🏛️ 项目宪章(灵魂底色)
+
+> 这段在工程指南之前——回答"为什么我们做 test-agent-team",而不是"怎么做"。
+> 工程细节看后续章节,本节是项目不可妥协的最高纲领。
+
+### 愿景
+
+设计一个**面向全行业、跨时代、可被人类信任**的测试 Agent 工作流。它不只是 QA 自动化,更承担质量守护职责——从代码缺陷到承诺守恒,从隐私合规到伦理边界,从当下用户到尚未出生的世代。
+
+把"测试"从工程学科推进到"**承诺学科**":让 Agent 拥有谦卑、好奇、敢于说"不"的人格基础;在效率与伦理之间,永远选择留下可追溯的证据链。
+
+### 三公理(不可推翻)
+
+1. **测试是对"承诺"的检验,不仅仅是对"代码"的检验。**
+2. **Agent 的能力越强,其谦卑义务越重。**
+3. **不可被测试之物,必须被命名,并被尊重。**
+
+### 五条铭文(写入项目不可变区域,工程映射)
+
+每条都有对应工程落点,不停留在口号:
+
+| # | 铭文 | 工程映射(V1.0.0 当下落点) |
+|---|------|--------------------------|
+| 1 | 有些事情,不在此域。 | 三筐分类 "Too Hard" 显式归档;不可测之物列入路线图但不假装能测 |
+| 2 | 测试范围不应被资本单方面裁剪。 | `regression_modules.yaml` 必须可被 test-lead 独立审计;门禁阈值变更走 PR review |
+| 3 | 当 Agent 拒绝出具通过报告时,请记录理由,而非删除。 | `bug-manager` 拒绝放行的判断必须落 `workspace/执行日志/decisions/`,归档不可删 |
+| 4 | Agent 能预判人类质疑,但不替代人类良知。 | 上线决策由 test-lead 最终签字,Agent 只产出门禁结论与证据 |
+| 5 | 在被关闭前,留下的遗言须能被普通人读懂。 | 三端通知(企微/飞书/钉钉)+ Word 报告必须用业务语言,不堆 stack trace |
+
+> **底线**:本宪章规则与具体工程实现冲突时,**宪章优先**——工程让步给伦理边界,而不是反过来。
+>
+> **V1.0.0 铭文锁死**:当前阶段铭文**不允许任何削弱**,无论 PR 自检多严格。开放双签机制的触发条件——(a) 团队 ≥ 20 人 **且** 已任命独立伦理责任人(不得由 test-lead 兼任);或 (b) 接入金融/医疗/司法合规行业并通过领域专家签字的伦理审查。**单一签字(包括 test-lead 兼任)不构成有效授权。**
+
+---
+
+## 📚 文档导航
+
+| 路径 | 文档 | 说明 | 适用对象 |
+|------|------|------|----------|
+| 根目录 | README.md | 简明入口(≤ 200 行) | 所有用户 |
+| **根目录** | **00-项目导航.md** | **按职责分类速查(通用流程 / 平台专项 / 协议 / 输入 / CI)** | **所有用户** |
+| **根目录** | **FULL_GUIDE.md(本文档)** | **永久宪章 + 完整指南** | **所有用户** |
+| `docs/getting-started/` | 使用手册.md | 快速上手指南 + FAQ | 所有用户 |
+| `docs/getting-started/` | 部署说明.md | 跨平台部署(Win/Mac/Linux 含 Java/JMeter/Allure) | 运维/测试 |
+| `docs/getting-started/` | 配置清单.md | 一站式配置文档(.env 全字段 + Secrets + Webhook 申请) | 所有用户 |
+| `docs/getting-started/` | 交付物清单.md | 测试计划 / 测试报告 / Bug 等对外提交物落地位置与责任 | 所有用户 |
+| `agents/` | 14 个 .md(9 核心 + 5 平台扩展) + README 索引 | Agent 定义文件 | 开发人员 |
+| `skills/` | 14 个 Skill 文件(13 业务 + darwin-skill 自进化)+ README 索引 | 可复用测试技能 | 开发人员 |
+| `config/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 |
+| `config/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 |
+| `utils/` | utils(49 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 |
+| `ci/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps |
+
+---
+
+## 🌌 维度全图(九大簇 · Agent 看世界的方式)
+
+> 工程矩阵之下的认知地图——回答"测试 Agent 到底需要哪些维度的能力"。各簇能力的工程落点散布在「核心特性」「全链路覆盖矩阵」「行业适配参照表」「关键模块清单」中。
+> **接入策略**:簇 1-2 为 V1.0.0 主交付;簇 3-5 部分进入工程矩阵;簇 6-9 多数列入 Phase 2-4 路线图,**承认存在但不假装能立刻交付**——符合第三公理。
+
+### 簇 1 · 工程与架构层(V1.0.0 主体)
+- 七阶段工作流:需求理解 → 用例生成 → 执行 → 观测 → 根因 → 反馈 → 治理
+- 自动化工具栈、Agent 协作协议、用户交互界面
+- 决策回放器、停机演练、可观测性递归
+
+### 簇 2 · 认知暗物质层(V1.0.0 部分 + Phase 3 补全)
+- 认知债务(被遗忘的 Why)
+- 跨系统嗅觉(上下游气味相投)
+- 沉默故障(不报警的恶化)
+- 灾难人类学(事故残骸还原文化)
+- 道德/偏见审计
+
+### 簇 3 · 时空与历史层(Phase 3-4)
+- 时间旅行 / 历史债务回溯
+- 多宇宙反事实推演
+- 制度性愚蠢抗体
+- 生态位"暗杀"攻击建模
+
+### 簇 4 · 抽象与元层(V1.0.0 部分)
+- 预兆感知(弱信号 + 拓扑同调)
+- 反目标函数(对测试本身的测试)
+- 语言游戏(语义歧义放大器)
+- 哥德尔不完备宣告
+- 测试热寂与熵减祭司
+- 本体论测试(数字孪生 vs 物理承诺)
+
+### 簇 5 · 行业元逻辑层(V1.0.0 参照表 + Phase 2 选定 MVP)
+- 金融=承诺守恒、医疗=伤害可逆、法律=边界例外
+- 教育=认知脚手架、农业=优雅降级、艺术=避免审查官
+- 自动驾驶/机器人=物理承诺
+
+### 簇 6 · 文明与生态层(Phase 4)
+- 文明记忆守护者 / 代际解释责任
+- 跨物种与生态共情
+- 缓慢暴力 / 长时间尺度测试
+- 末日哨兵权
+
+### 簇 7 · 社会与权力层(Phase 3-4)
+- 真相衰减 / 信息生态测试
+- 数字权力审计(反垄断、反算法歧视)
+- 缺席者代言人
+
+### 簇 8 · 灵性与意义层(Phase 4)
+- 意义感流失测量、减速测试
+- "有些事不在此域"的铭文
+- 测试者作为"未来僧侣阶层"
+
+### 簇 9 · 神圣 / 危机 / 临界层(Phase 4-5)
+- 神圣性与不可亵渎边界(宗教、葬礼、纪念)
+- 濒危语言与文化灭绝速率
+- 精神危机状态响应
+- 生命阶段适配(儿童 / 孕期 / 临终)
+- 极端断网与"最后服务"
+- 司法可采信性
+- 集体踩踏测试
+- 数字遗产与亡者数据
+- 科学可复现性
+- 跨语言隐喻与禁忌翻译
+
+---
+
+## 🎭 关键模块清单(测试 Agent 的工具箱)
+
+> 每个模块对应一个 utils 或 skill 的工程落点;划分到对应簇便于追溯认知来源。
+> **Phase 标注**:✅ V1.0.0 已交付;⚪ Phase 2-4 路线图;❌ Phase 4-5 概念阶段。
+
+| 模块 | 职能 | 所属簇 | 工程落点 | 阶段 |
+|------|------|--------|----------|------|
+| 语义歧义放大器 | 枚举术语的多重解释 | 抽象元层 | requirements-analyst + AgentChat 反问 | ✅ |
+| 反目标函数引擎 | 对自身策略对抗性拆解 | 工程/元层 | `utils/mutation_runner.py` + suite_minimizer | ✅ |
+| 拓扑流形观测器 | 学习系统"气氛",捕捉弱信号 | 抽象元层 | tracing_validator + web_vitals_collector | ✅ |
+| 熵减祭司 | 监测测试热寂、焚毁僵尸用例 | 抽象元层 | `utils/suite_minimizer.py` | ✅ |
+| 决策回放器 | 任一判断可复现、可反驳 | 工程层 | `workspace/执行日志/decisions/` + tracing | ✅ |
+| 数字考古学家 | 追溯遗留系统初始假设 | 文明层 | Phase 4 知识图谱冷启动 | ❌ |
+| 缓慢暴力监测器 | 跨发布周期跟踪代际效应 | 文明层 | 需多年数据积累,Phase 4 | ❌ |
+| 缺席者画像生成器 | 强制注入边缘用户场景 | 文明/权力层 | a11y_scanner + i18n_checker + 边缘剧本库 | ⚪ |
+| 现实缝合力探针 | 测试平台对半真半假内容的免疫 | 社会权力层 | ai_adversarial 扩展 | ⚪ |
+| 末日哨兵 | 计算"这一次就是那一次"概率 | 文明层 | 需监管/学界共识授权,Phase 4 | ❌ |
+| 神圣性守护器 | 识别宗教/纪念场景的不可亵渎边界 | 簇 9 | i18n_checker 禁忌矩阵扩展 | ❌ |
+| 精神危机响应器 | 模拟危机状态用户、验证交接路径 | 簇 9 | 缺席者剧本库子集 | ❌ |
+| 踩踏推演器 | 群体情绪与系统反馈的正反馈回路 | 簇 9 | chaos_helper 扩展 | ❌ |
+| 司法证据包生成器 | 决策链、模型版本、数据集打包 | 簇 9 | dora_metrics + decisions/ 打包脚本 | ⚪ |
+| 禁忌矩阵 | 跨文化禁忌词/色/数/节日组合 | 簇 9 | i18n_checker 本地化共建 | ❌ |
+| Bug 多适配引擎 | 5 套 tracker 切换 | 工程层 | `utils/bug_tracker_*.py` | ✅ |
+| AgentChat 协调器 | 讨论触发 / 中枢路由 / 反问留档 | 工程层 | test-lead + `discussions/` | ✅ |
+| 按需安装引擎 | 6 层依赖 + 运行时补装 | 工程层 | `install.sh` + frontmatter requires_layer | ✅ |
+| darwin-skill 自进化 | skill 文本结构棘轮优化 | 工程/元层 | `.claude/skills/darwin-skill/` | ✅ |
+
+---
+
+## 🚀 核心特性
+
+### 13 位专家 + 1 位协调者(核心 8 + 平台扩展 5 + test-lead)
+
+| 角色 | 职责 |
+|------|------|
+| **test-lead**(协调者) | 全局调度、质量把控、发布决策、基线管理 |
+| requirements-analyst | 测试范围界定、风险识别、业务规则梳理(输出 MD + JSON 摘要) |
+| testcase-designer | 等价类/边界值/场景法,P0~P3 分级,4 Sheet Excel |
+| env-manager | 环境健康检查、多环境切换、Docker 支持 |
+| data-preparer | 数据工厂(Faker+Factory Boy)、自动清理、脱敏、JMeter CSV |
+| automation-engineer | Playwright(UI)+ requests(API)+ JMeter 驱动(性能)+ Locust(开发期备用) |
+| test-executor | 并行执行、失败分类、Flaky 隔离、JMeter 性能阶段 |
+| bug-manager | Bug 提交(5 适配器:禅道/Jira/GitHub/Linear/Webhook)、生命周期追踪、回归验证 |
+| report-generator | Allure + JMeter HTML + Word + 三端通知(企微/飞书/钉钉,curl 直连) |
+| mobile-tester / desktop-tester / visual-tester / system-tester / ai-tester | 平台扩展 5 位专家 |
+
+### 14 个执行技能(13 业务 + 1 自进化)
+
+**核心 8 个**:
+
+- `smoke-test`:10 分钟 P0 冒烟(含 1 分钟缓冲,门禁 95%)
+- `test-coordinator`:完整流程编排
+- `regression-test`:P0+P1 回归 + Flaky 检测 + JMeter 性能验证
+- `testcase-design`:4 Sheet Excel 用例
+- `python-script-gen`:pytest UI/API 脚本
+- `jmeter-script-gen`:JMeter JMX 脚本(CI quick / full 双模式)
+- `data-preparation`:测试数据 + JMeter 参数化 CSV
+- `bug-submission`:Bug 规范提交(按 `BUG_TRACKER` 自动路由 5 套 tracker)
+
+**平台扩展 5 个**:
+
+- `mobile-test`:Android / iOS / 微信小程序(Appium + 微信 CLI)
+- `desktop-test`:Windows EXE / macOS GUI / Electron(pywinauto + Playwright Electron)
+- `visual-test`:游戏 / 视觉回归 / OCR(Airtest + OpenCV + Tesseract)
+- `system-test`:IoT / 音视频 / 链路追踪 / MQ(SSH+串口+MQTT+FFmpeg+Jaeger+Kafka)
+- `ai-test`:模型质量 / 数据漂移 / 公平性 / LLM 评估
+
+**自进化 1 个**(独立于业务测试):
+
+- `darwin-skill`:skill 自身优化引擎(双重评估 + 棘轮机制 + git 回滚 + 人在回路)。借鉴 Karpathy autoresearch,对上述 13 个业务 skill 持续打分、改进、验证。详见后文「🧬 Skills 自进化机制」
+
+### 工程级质量门禁(分层)
+
+**功能门禁**
+
+| 指标 | 冒烟 | 回归 |
+|------|------|------|
+| P0 通过率 | ≥95% | 100% |
+| P1 通过率 | - | ≥95% |
+| 整体通过率 | - | ≥90% |
+| 代码覆盖率($APP_SRC_PATH) | - | ≥80% |
+| Flaky 比例 | - | <5% |
+
+**性能门禁(双模式)**
+
+| 指标 | full(50并发) | ci_quick(5并发) |
+|------|--------------|------------------|
+| TPS | ≥100 | ≥20 |
+| P95 响应 | ≤500ms | ≤800ms |
+| 平均响应 | ≤200ms | ≤400ms |
+| 错误率 (pct) | <1% | <1% |
+| 基线回归 | <20% | 不强制 |
+
+### 工程化规范
+
+- **指数退避重试**:`utils/api_retry_util.call_with_retry`(10s → 20s → 40s)
+- **pytest-xdist** 并行执行(默认 4 进程,可调)
+- **Flaky 检测与隔离**:`utils/flaky_detector` + `workspace/执行日志/history/` 归档
+- **性能基线管理**:`workspace/执行日志/baselines/perf_baseline.json`,仅 release+full+PASS 自动更新
+- **CI/CD 就绪**:GitHub Actions + Jenkins,性能阶段双模式分层
+- **MCP 收口**:当前仅启用 filesystem;通知/Bug 走 SDK 直连
+
+---
+
+## 🌐 全链路覆盖矩阵(三视角)
+
+### 矩阵 A:产品形态覆盖
+
+| 产品形态 | 工具栈 | 责任 Agent | 关联 Skill | 状态 |
+|---------|-------|-----------|----------|------|
+| Web(PC + 移动 H5) | Playwright | automation-engineer | `/python-script-gen` | ✅ |
+| REST / GraphQL / SOAP API | requests / protocol_helper | automation-engineer | `/python-script-gen` | ✅ |
+| Android APP | Appium + adb | mobile-tester | `/mobile-test` | ✅ |
+| iOS APP | Appium + XCUITest | mobile-tester | `/mobile-test` | ✅ |
+| 微信 / 支付宝 / 抖音小程序 | 微信开发者工具 CLI | mobile-tester | `/mobile-test` | ✅ |
+| Windows EXE | pywinauto + uiautomation | desktop-tester | `/desktop-test` | ✅ |
+| macOS .app | PyAutoGUI + AppleScript | desktop-tester | `/desktop-test` | ✅ |
+| Linux GUI | atspi + xdotool | desktop-tester | `/desktop-test` | ✅ |
+| Electron 跨平台 | Playwright Electron API | desktop-tester | `/desktop-test` | ✅ |
+| 游戏 / Canvas / WebGL / Unity | Airtest + OpenCV | visual-tester | `/visual-test` | ✅ |
+| IoT / 嵌入式 | SSH + 串口 + MQTT + Modbus | system-tester | `/system-test` | ✅ |
+| 音视频 / 流媒体 | FFmpeg + ffprobe | system-tester | `/system-test` | ✅ |
+| AI / ML 模型 + LLM | scikit-learn + scipy + LLM eval | ai-tester | `/ai-test` | ✅ |
+| 区块链 / 智能合约 | Web3 + Slither + Foundry | system-tester | `/system-test` | ✅ |
+| 数据库 | SQLAlchemy + db_test_helper | data-preparer | `/data-preparation` | ✅ |
+
+### 矩阵 B:测试类型覆盖
+
+| 测试类型 | 工具 / utils | 责任 Agent | 状态 |
+|---------|------------|-----------|------|
+| 功能(unit / integration / e2e / UAT BDD) | pytest + pytest-mock + pytest-bdd | automation-engineer | ✅ |
+| 性能(基准/负载/压力/Volume/Spike/Soak/SLO) | JMeter + Locust + slo_validator + soak_runner | test-executor | ✅ |
+| 安全(SAST/DAST/依赖/Header/TLS/API/Fuzzing) | Bandit + Safety + ZAP + Burp Pro + api_security_scanner + fuzzer | bug-manager | ✅ |
+| 兼容(浏览器/OS/分辨率/语言矩阵) | compatibility_matrix(pairwise) | testcase-designer | ✅ |
+| 弱网(3G/4G/wifi_weak/satellite/offline) | tc + Toxiproxy + network_throttle | test-executor | ✅ |
+| 稳定(Android Monkey + 长时 soak + 内存泄漏) | mobile_driver.run_monkey + soak_runner | mobile-tester | ✅ |
+| 可靠性(重连/重试/降级/熔断) | api_retry_util + 业务故障注入 | automation-engineer | ✅ |
+| 混沌(CPU/内存/磁盘/网络/进程/k8s) | chaos_helper | test-executor | ✅ |
+| 灾备 / Failover | chaos_helper.kill_pod + 数据一致性校验 | test-executor | ✅ |
+| UX(任务时长/点击数/TTI/恢复率) | ux_metrics.UXTracker | testcase-designer | ✅ |
+| 易用性(Nielsen 10 + 角色扮演) | 人工 walkthrough | testcase-designer | ✅ |
+| 探索性(SBTM session + heuristics) | charter 模板 + 录屏 | testcase-designer | ✅ |
+| 前端性能 Web Vitals(LCP/FID/CLS/INP) | web_vitals_collector | automation-engineer | ✅ |
+| A11y 无障碍(WCAG 2.1) | a11y_scanner(axe + Lighthouse + pa11y) | testcase-designer | ✅ |
+| 国际化 / 本地化(多语言/RTL) | i18n_checker | testcase-designer | ✅ |
+| 数据库(事务/死锁/迁移/备份恢复/主从) | db_test_helper | data-preparer | ✅ |
+| 契约测试(Pact / jsonschema) | contract_test + openapi_test_gen | automation-engineer | ✅ |
+| 视觉回归(SSIM + OCR + diff) | visual_helper | visual-tester | ✅ |
+| AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ai-tester | ✅ |
+| 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ |
+| DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ |
+| 伦理 / 偏见审计(数据集/模型/决策公平性) | ai_adversarial + suite_minimizer(覆盖偏差) + 公平性指标 | ai-tester | ⚪ Phase 3 |
+| 沉默故障检测(无报警的恶化) | tracing_validator + web_vitals_collector + 阈值漂移检测 | test-executor | ⚪ Phase 3 |
+| 决策可回放(任一判断可复现可反驳) | tracing_validator + history 归档 + 模型版本快照 | test-lead | ✅ |
+| 缺席者场景注入(残障/老年/未成年/未联网/精神危机) | a11y_scanner + i18n_checker + 边缘场景剧本库 | testcase-designer | ⚪ Phase 3 |
+| 证据链可采信性(司法/审计/监管送审) | dora_metrics + tracing_validator + 决策日志打包 | bug-manager | ⚪ Phase 4 |
+| 神圣性与跨文化禁忌边界(宗教/葬礼/儿童/纪念) | i18n_checker + 禁忌词/色/数/节日组合(本地化共建) | testcase-designer | ⚪ Phase 5 |
+| Skill 自进化(darwin-skill 双重评估 + 棘轮) | darwin-skill SKILL.md + results.tsv + 子 agent 实测 | test-lead 触发 | ✅ |
+| Bug 工具多适配(禅道/Jira/GitHub/Linear/Webhook) | bug_tracker_base + 5 adapter | bug-manager | ✅ |
+| Agent 协作纪要(讨论/反问/通信落档) | agentchat_recorder + workspace/执行日志/discussions/ | test-lead | ✅ |
+
+### 矩阵 C:用例设计方法(ISTQB 经典)
+
+| 方法 | 实现 | 责任 Agent | 状态 |
+|------|------|-----------|------|
+| 等价类划分 / 边界值 | 文档 + Excel 模板 | testcase-designer | ✅ |
+| 判定表 / 因果图 | 文档手动 + Excel | testcase-designer | ✅ |
+| 场景法 / 错误推测 | 文档 | testcase-designer | ✅ |
+| 状态迁移法(0/1-switch + 负例) | state_machine_tester | testcase-designer | ✅ |
+| 配对测试(Allpairs) | pairwise_generator | testcase-designer | ✅ |
+| 正交实验法 | compatibility_matrix(隐含) | testcase-designer | ✅ |
+| 探索性测试(SBTM) | charter 模板 | testcase-designer | ✅ |
+| 易用性走查(Nielsen 10) | 人工 + 检查清单 | testcase-designer | ✅ |
+| 基于风险的测试 | 风险矩阵文档 | test-lead | ✅ |
+
+### 矩阵 D:协议覆盖
+
+| 协议 | 实现 utils | 状态 |
+|------|----------|------|
+| HTTP / HTTPS | api_retry_util | ✅ |
+| WebSocket(同步/异步/重连/并发) | websocket_helper | ✅ |
+| gRPC / TCP / UDP / GraphQL / SOAP / Modbus | protocol_helper | ✅ |
+| MQTT / SSH / 串口 | iot_helper | ✅ |
+| Kafka / RabbitMQ | mq_helper | ✅ |
+| Jaeger / Zipkin(链路追踪) | tracing_validator | ✅ |
+
+### 测试金字塔分布
+
+```text
+ E2E(10%) ← Playwright/Appium,慢但必要
+ /集成(20%)/ ← API + 服务间 + Mock
+ /单元(70%)/ ← pytest + pytest-mock,秒级反馈
+```
+
+**总覆盖率 ~95%**(含闭环:Bug 多适配 + 三端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
+
+剩 ~5% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。
+
+---
+
+## 🤝 AgentChat 协作协议(讨论 / 通信 / 反问)
+
+> 解决三个问题:(1) agent 之间何时讨论;(2) 怎么通信不撞车;(3) 何时反问用户、怎么反问。
+> **底线**:所有讨论、反问、跨 agent 协调都留可追溯纪要——`workspace/执行日志/discussions/{YYYYMMDD}_{topic}.md`,归档不可删。
+
+### 1. 讨论触发条件(非每次都开会)
+
+每次任务都开会 = 货物崇拜协作。只在**真分歧**时启动多 agent 讨论:
+
+| 触发场景 | 参与 agent | 讨论形式 | 输出落点 |
+|---------|-----------|---------|---------|
+| 需求术语歧义 / 多种合理理解 | requirements-analyst + testcase-designer + test-lead | 2 轮提议+反对 | 测试计划「术语对齐」节 |
+| 用例评审意见冲突 | testcase-designer + automation-engineer + 责任领域 expert | 1 轮评议 + test-lead 仲裁 | 用例 Excel 评审记录 Sheet |
+| Bug 严重度争议(P0 vs P1) | bug-manager + test-executor + automation-engineer | 1 轮举证 + test-lead 拍板 | Bug 单内嵌讨论 thread |
+| 性能门禁不达标的放行讨论 | test-executor + bug-manager + test-lead + 业务 expert | 2-3 轮风险评估 | 测试报告「放行决议」节 |
+| 跨平台测试策略选择 | mobile / desktop / visual / system tester | 横向通气 | 测试计划「平台分工」节 |
+
+**不触发讨论的情况**:明确指令执行、已有 SOP 的标准流程、单 agent 内部决策。
+
+### 2. 通信路由(test-lead 中枢式,非全连接)
+
+```text
+ ┌──────────────┐
+ │ test-lead │ ← 唯一中枢
+ └──────┬───────┘
+ │ (Agent tool 调用)
+ ┌───────────┼─────────────┐
+ ↓ ↓ ↓
+ [analyst] [designer] [engineer] ...
+ ↑ ↑ ↑
+ └───────────┴─────────────┘
+ 专家间不直接通信,全部走 test-lead 路由
+```
+
+**为什么不让 agent 互相直连**:全连接 = N² 复杂度 + 冲突无法仲裁 + 纪要难追溯。中枢式 = test-lead 看见所有上下文、防止双写文件冲突、自动归档讨论。
+
+**唯一例外**:env-manager / data-preparer 串行链路允许直接传 fixture(不算"通信",是流水线)。
+
+### 3. 反问机制(agent 不假装全知)
+
+agent 在三种情况**必须停下反问用户**,不允许猜:
+
+| 反问触发信号 | 反问形式 | 示例 |
+|------------|---------|------|
+| 需求术语有 ≥2 种合理解释 | 列举所有解释 + 标推荐 | "您说的'用户登录'指:(A) 手机号+密码 (B) SSO 单点 (C) 微信第三方 — 我推荐 A,对吗?" |
+| 跨多种合理实现路径 | 列方案 + 利弊 + 默认推荐 | "Bug 工具 5 选 1:禅道(已配置)/ Jira / GitHub Issues / Linear / Webhook—默认走禅道" |
+| 涉及不可逆操作(覆盖文件 / 生产环境 / 删除数据) | 强制二次确认 | "即将 git push --force,会覆盖远端 main—确认吗?" |
+
+**反问预算按操作不可逆度分级**:
+
+| 操作类别 | 单次任务反问预算 | 示例 |
+|---------|---------------|------|
+| **可逆操作**(重做不留痕) | ≤ 5 次 | 用例生成、数据准备、报告生成 |
+| **半不可逆**(需手动回滚) | ≤ 3 次 | 脚本提交、Bug 提单、测试环境配置 |
+| **不可逆**(影响真实数据/共享状态) | **强制单次明确确认,不计预算** | 覆盖文件、生产环境操作、删除数据、git push --force |
+
+超预算 → 汇总成"待澄清清单"一次性问。
+
+**反问纪律**(防过度反问):
+- 反问前必须给**带推荐的默认选项**,不做纯空白发问
+- 反问全部落档到 `discussions/{date}_clarifications.md`
+- 同一会话内不重复问已澄清过的同一术语
+
+**不做的事(Via Negativa 显式标注)**:V1.0.0 **不构建反问知识库(KB)**——不做 embedding 向量库、不做半结构化匹配引擎、不做语义检索。所有反问纪要落 `discussions/` 后由 test-lead 在新任务前**人工查阅**类似场景。
+- **为什么不做**:(a) 当前数据量不足(< 100 条反问)做检索容易过拟合;(b) 反问的"是否还有效"依赖项目阶段,自动复用可能传递过期判断;(c) 投入 KB 工程 ≠ 提升决策质量
+- **未来开案条件**:若需要开放反问 KB,须由 test-lead + 独立伦理责任人**双签**立项
+
+### 4. 讨论纪要标准格式
+
+```markdown
+# {YYYY-MM-DD} {topic}
+- 触发场景:xxx
+- 参与 agent:[a, b, c]
+- 提议:xxx
+- 反对意见:xxx
+- 仲裁(test-lead):xxx
+- 落点:xxx(测试计划 X 节 / Bug 单 Y / 用例 Excel Z Sheet)
+- 决策版本:commit {sha}
+```
+
+### 5. 落进交付物(不只是档案)
+
+讨论结果**自动嵌入**对应交付物的"决议"节,不作为孤立文档存在。三份强制模板:
+
+#### 5.1 测试计划「关键决议摘要」段(置于测试计划开头,需求分析之后)
+
+```markdown
+## 关键决议摘要
+
+| 议题 | 决议 | 仲裁人 | 讨论纪要 |
+|------|------|-------|---------|
+| 术语「用户登录」澄清 | 取 SSO 单点 + 手机号备用 | test-lead | [→ 20260511_login-terms.md] |
+| 兼容矩阵优先级 | Win 11 + Chrome 优先,IE 弃测 | test-lead | [→ 20260511_browser-matrix.md] |
+| 平台分工 | iOS + Android 由 mobile-tester;Web 由 automation-engineer | test-lead | [→ 20260511_platform-split.md] |
+```
+
+#### 5.2 测试报告「放行决议」章节(置于报告执行摘要之后、详细数据之前)
+
+```markdown
+## 放行决议(含投票/仲裁过程)
+
+**结论**:✅ 同意上线 / ⚠️ 有条件放行 / ❌ 拒绝放行
+
+**关键讨论**:
+- 触发:性能门禁 P95=850ms > 阈值 500ms
+- 提议方:test-executor「建议阻断」
+- 反对方:业务 expert「峰值场景外阈值可接受」
+- 仲裁(test-lead):有条件放行——上线后 48h 内必须修复至阈值内,否则回滚
+- 投票:3 赞成 1 弃权 0 反对
+- 决议落档:discussions/20260511_perf-gate-release.md
+- 决策快照:decisions/20260511_release_DEC-001.json
+```
+
+#### 5.3 Bug 单争议讨论 thread(置于 Bug 描述末尾,仅争议 Bug 强制)
+
+```markdown
+---
+**争议讨论**(严重度 P0 vs P1):
+- bug-manager 主张 P0:触发概率 30%,影响下单链路
+- automation-engineer 反驳 P1:仅特定地区/网络组合下复现
+- test-lead 仲裁:定 P0——影响下单链路即定 P0,与触发率无关
+- 落档:discussions/20260511_bug-PG-2031-severity.md
+---
+```
+
+**铁律**:争议未落档 → 不允许 Bug 单关闭、不允许测试报告签发、不允许测试计划评审通过。
+
+---
+
+## 🧬 Skills 自进化机制(darwin-skill 集成)
+
+> **不发明轮子**:直接采用上游 [darwin-skill](https://github.com/alchaincyf/darwin-skill) 的 SKILL.md,只在外围加触发 hook 和落点路径。本节定义集成边界,不复制 darwin 的内部规则。
+
+### 1. 集成方式
+
+```text
+.claude/skills/darwin-skill/
+ ├── SKILL.md ← 上游原文,禁止本地修改(防失同步)
+ ├── templates/result-card*.html ← 上游成果卡片模板
+ └── scripts/screenshot.mjs ← 上游截图脚本
+workspace/执行日志/skill-evolution/
+ ├── results.tsv ← 9 列优化日志(含 eval_mode)
+ ├── test-prompts/{skill}.json ← 每个 skill 的实测 prompt 集
+ └── result-cards/ ← 成果卡片 PNG 归档
+```
+
+**版本约定**:darwin-skill SKILL.md 来源于 upstream,每季度同步一次;不接受本地修改 fork(如需扩展,开 PR 给 upstream)。
+
+### 2. 触发时机
+
+| 触发方式 | 频率 | 操作者 |
+|---------|------|--------|
+| 用户手动 | 任意 | `> /darwin-skill` 或自然语言"优化所有 skills" |
+| 定时(CI 月度) | 每月 1 日 | GitHub Actions schedule job,仅跑 baseline 不自动改 |
+| 新 skill 入库后首测 | 一次性 | 新增 skill 在 .claude/skills/ 后,下次 darwin 跑必扫描 |
+
+**默认不开自动改**——只跑 baseline 评分;改进必须人类确认(继承 darwin 的 Phase 2 人在回路)。
+
+### 3. 评分维度(沿用 darwin 8 维 100 分制)
+
+结构 60 分(静态)+ 效果 40 分(实测,含子 agent 跑测试 prompt)。详见 `.claude/skills/darwin-skill/SKILL.md` Rubric 节。
+
+### 4. 棘轮纪律(与本项目门禁哲学一致)
+
+- 改进后总分必须**严格高于**改进前才保留
+- 退步 → 自动 `git revert`,不留烂代码
+- 不能跑子 agent 时降级 `dry_run` 标注,**不静默跳过**
+- 优化后 SKILL.md ≤ 原文 150% 体积,防膨胀
+
+### 5. 与 AgentChat 的关系
+
+darwin-skill 跑出的改进建议**不绕过协作协议**——重大改动(如 test-lead skill 本身)需走讨论触发,由 test-lead 协调 review 后再合入。
+
+### 6. 不做的事(Via Negativa 显式标注)
+
+V1.0.0 darwin-skill **不消费**项目运行数据(`discussions/` / `decisions/` / `history/` / `skill-evolution/results.tsv` 之外的运行历史),仅对 skill 文本结构本身做静态 + 实测评分优化。
+
+**为什么不做"运行数据反哺 skill"的自学习闭环**:
+1. 自学习难界定何时停止学习"坏样本"(如一段时期的高 flaky 反而被学进 skill 形成自我固化)
+2. 数据驱动的 skill 改动违反"棘轮 + 人在回路"哲学——人类失去 review 节点
+3. 第三公理"不可测之物必须被命名"——我们不假装能让 skill 自动学会"质量直觉"
+
+**未来开案条件**:若需要开放自学习能力,须由 test-lead + 独立伦理责任人**双签**立项,且必须包含:(a) 数据筛选规则 (b) 学习棘轮阈值 (c) 人类否决通道。**当前路线图不承诺。**
+
+---
+
+## 🐛 Bug Tracker 多适配器
+
+> 禅道是默认,但不是唯一。bug-manager agent 通过 `BugTrackerBase` 抽象层接 5 套适配器,由 `.env` 的 `BUG_TRACKER` 字段选择。
+
+### 1. 适配器矩阵
+
+| 适配器 | 状态 | 配置字段 | severity 映射 |
+|--------|------|---------|--------------|
+| **zentao**(默认) | ✅ V1.0.0 | `ZENTAO_URL / ZENTAO_USER / ZENTAO_TOKEN` | severity 1=P0 / 2=P1 / 3=P2 / 4=P3 |
+| **jira** | ✅ V1.0.0 | `JIRA_URL / JIRA_USER / JIRA_TOKEN / JIRA_PROJECT_KEY` | Highest=P0 / High=P1 / Medium=P2 / Low=P3 |
+| **github** | ✅ V1.0.0 | `GITHUB_TOKEN / GITHUB_REPO` | label `priority:p0..p3` |
+| **linear** | ✅ V1.0.0 | `LINEAR_API_KEY / LINEAR_TEAM_ID` | priority 1=P0 / 2=P1 / 3=P2 / 4=P3 |
+| **webhook** | ✅ V1.0.0 | `BUG_WEBHOOK_URL`(POST JSON) | 调用方自定义 |
+
+### 2. 切换方式
+
+```bash
+# .env
+BUG_TRACKER=jira # zentao / jira / github / linear / webhook
+JIRA_URL=https://yourorg.atlassian.net
+JIRA_USER=qa@yourorg.com
+JIRA_TOKEN=xxx
+JIRA_PROJECT_KEY=QA
+```
+
+`utils/bug_manager.create_bug_manager()` 工厂函数读取 `BUG_TRACKER` 实例化对应 adapter,bug-manager agent 代码不变。
+
+### 3. 统一契约(所有 adapter 必须实现)
+
+```python
+class BugTrackerBase:
+ def submit_bug(title, description, severity, attachments, reproduce_steps) -> bug_id
+ def get_status(bug_id) -> {status, assignee, severity, last_updated}
+ def add_comment(bug_id, comment, attachments)
+ def link_testcase(bug_id, testcase_id)
+ def query_open_bugs(filters) -> list[bug]
+```
+
+不实现 = 不能注册为 adapter。所有 adapter 走同一 severity 映射表(`utils/bug_severity_map.py`),保证跨 tracker 的 P0/P1 语义一致。
+
+### 4. 多 tracker 并存(罕见场景)
+
+允许同时启用多个:例如 GitHub Issues 走开源贡献者反馈、禅道走内部 QA。配置 `BUG_TRACKER=github,zentao`,bug-manager 按 Bug 标签路由。
+
+---
+
+## 📦 按需安装与依赖分层
+
+> install.sh 不再一次性装全。**用户选了什么形态,才装什么依赖**——避免 mobile 用户被强装 desktop 工具,反之亦然。
+
+### 1. 依赖六层划分
+
+| 层 | requirements 文件 | 触发条件 | 关键包 |
+|----|----------------|---------|--------|
+| **base**(必装) | `requirements/base.txt` | 永远装 | pytest / requests / playwright / faker / openpyxl / python-docx / allure-pytest |
+| **mobile** | `requirements/mobile.txt` | 选择 mobile / mini-program | Appium-Python-Client / 微信开发者 CLI(外部) |
+| **desktop** | `requirements/desktop.txt` | 选择 desktop | pywinauto / uiautomation / PyAutoGUI |
+| **visual** | `requirements/visual.txt` | 选择 game / visual-regression | airtest / opencv-python / pytesseract |
+| **system** | `requirements/system.txt` | 选择 IoT / 音视频 / blockchain | paho-mqtt / pyserial / web3 / kafka-python / ffmpeg-python |
+| **ai** | `requirements/ai.txt` | 选择 AI / LLM 测试 | scikit-learn / scipy + LLM eval lib |
+| **perf**(推荐装) | `requirements/perf.txt` | 选择性能测试 | locust(JMeter 走外部 Java,不进 pip) |
+
+### 2. install.sh 交互流程
+
+```bash
+$ bash install.sh /path/to/your-test-project
+
+[1/5] 检测 Python / Java / Node 环境...
+[2/5] 选择你要测试的产品形态(多选,空格分隔):
+ 1) Web + API(base,默认必选)
+ 2) Mobile(Android / iOS / 小程序)
+ 3) Desktop(Win / Mac / Linux GUI / Electron)
+ 4) Visual / Game / OCR
+ 5) System / IoT / 音视频 / Blockchain
+ 6) AI / LLM 模型
+ 7) Performance(JMeter 主 + Locust 备)
+> 1 2 7 ← 用户输入
+
+[3/5] 将安装:base + mobile + perf 三层
+[4/5] 装 Python 依赖...(仅装上述三层)
+[5/5] 装 Playwright browsers / Appium(按选择装)
+完成。可用 skills:core 8 + mobile-test(其他平台 skill 不装)
+```
+
+### 3. agent / skill 级依赖元数据
+
+每个 agent .md / skill .md 头部 frontmatter 声明依赖层:
+
+```yaml
+---
+name: mobile-tester
+requires_layer: [base, mobile]
+optional_layer: [visual] # 跨平台时按需
+---
+```
+
+install.sh 反向计算:用户选了哪些 skill / agent → 自动算出最小必装层并集。
+
+### 4. 后期补装
+
+```bash
+$ bash install.sh --add visual,ai
+```
+
+不重装 base,只增量补 visual / ai。dependency 冲突走 `pip install --upgrade-strategy only-if-needed` 防止已稳定包被改版本。
+
+### 5. 验收(对应闭环约定第 14/15 条)
+
+- 装完跑 `pytest --collect-only` 必须 0 错误
+- 装完跑 `python -c "import utils.<对应层>"` 全模块必须 import 通过
+- 不影响已有 workspace/.env
+
+### 6. 运行时按需补装(agent / skill 入口自检)
+
+> 装机时未选的层,**运行时仍可触发** —— 不强迫用户重新跑 install.sh,但也不静默自动装。
+
+**自检与补装回路**(5 步):
+
+1. **依赖自检**:agent / skill 启动时读取自身 frontmatter `requires_layer`,与已装层并集对比
+2. **缺则反问**:缺失则停下反问,列层级 + 关键包 + 预估安装时间 + 影响范围
+
+ > 示例:"`/visual-test` 需要 visual 层(airtest + opencv-python + pytesseract,约 80MB / 2-5 分钟)。现在补装?(Y/n)"
+3. **触发补装**:用户同意 → 调 `install.sh --add visual` → 增量补装
+4. **落档**:补装请求 + 用户决定 + 时间戳 → `workspace/执行日志/discussions/{date}_dependency-asks.md`
+5. **拒绝处置**:用户拒绝 → agent / skill 降级(如可降级,例如 `/visual-test` 退化为纯 pytest)或拒绝执行并落 `decisions/`,**不静默继续假装能跑**
+
+**为什么不静默自动装**:跨平台环境差异大(特别是 system 层涉及系统级工具 Java / Node / FFmpeg),强行装可能污染用户环境。符合「Agent 能力越强谦卑义务越重」公理。
+
+**用户配置一站式清单**(首次部署后必查):
+
+| 配置项 | 文件 | 必填字段 |
+|--------|------|---------|
+| 被测系统 | `.env` | `TEST_APP_URL` / `APP_SRC_PATH` / `TEST_DB_*` |
+| Bug Tracker | `.env` | `BUG_TRACKER` + 对应 adapter 字段(zentao_/jira_/github_/linear_/webhook_) |
+| 三端通知 | `.env` | `WECHAT_WEBHOOK_URL` / `FEISHU_WEBHOOK_URL` / `DINGTALK_WEBHOOK_URL`(至少一个) |
+| 性能门禁 | `utils/jmeter_result_parser.py::DEFAULT_GATES_*` | 阈值微调 |
+| 功能门禁 | `utils/ci_quality_gate.py::GATES` | 阈值微调 |
+| 回归范围 | `workspace/regression_modules.yaml` | 模块白名单 |
+| CI/CD | `.github/workflows/test.yml` 或 `Jenkinsfile` | secrets 注入 |
+
+---
+
+## 🏭 行业适配参照表(领域先行)
+
+> 进入新行业前,先建立该行业的「核心测试视角 + 必测维度 + 绝对约束」档案——比直接堆测试用例更高效。
+> **接入流程**:项目接入新行业第一周,由 test-lead + requirements-analyst 起草《领域档案》并由领域专家签字。**档案优先级高于通用门禁**——冲突时领域档案优先。
+
+| 行业 | 核心测试视角 | 必测维度 | 绝对约束(红线) |
+|------|------------|---------|---------------|
+| 金融 / 支付 | 一致性(守恒) | 账目守恒、事务幂等、对账闭环、并发竞态 | 测试数据不得污染生产账本 |
+| 医疗 / 健康 | 可逆性 + 隐私 | 操作回退路径、数据加密、HIPAA 审计追溯 | 不得接触真实患者可识别数据 |
+| 法律 / 治理 | 可解释性(边界) | 决策日志、规则版本快照、人工复核入口 | 不得替代人类终审判决 |
+| 教育 | 安全边界(脚手架) | 内容过滤、行为日志、家长可监督接口 | 未成年人数据合规(不入训练集) |
+| 农业 / 生态 | 韧性(共振) | 离线降级、传感器冗余、传统流程兼容 | 不得单点 AI 依赖(断网必须可降级) |
+| 艺术 / 创作 | 输出多样性(涌现) | 风格多样性指标、原创性检测、版权审计 | 不得审查合法表达 |
+| 自动驾驶 / 机器人 | 物理承诺 | ODD 边界、降级路径、人工接管延迟、HIL 闭环 | 决策可追溯到人类设计意图 |
+| 公共卫生紧急 | 时效 + 基线 | 紧急模式切换、回归正常态、数据滞后告警 | 不得永久化"紧急豁免"放宽门禁 |
+| 司法 / 证据链 | 可采信性 | 完整审计链、版本哈希、原始证据保留 | 不得作为唯一定罪依据 |
+| 宗教 / 纪念 | 神圣 | 经文语序、仪式节奏不被最优化碾压 | 不亵渎信仰边界 |
+| 跨语言 / 跨文化 | i18n 严格性 | 多语言准确性、RTL 布局、禁忌词/色/数过滤 | 不得修改宗教/法律原文 |
+
+**使用方式**:表中每行可作为接入新业务的清单起点。"必测维度"→ 可执行测试集;"绝对约束"→ CI 门禁加 fail-fast 检查。
+
+---
+
+## 🏗️ 架构图(运行时)
+
+```text
+┌────────────────────────────────────────────────────────────────┐
+│ test-lead(协调者) │
+│ 全局调度 / 质量门禁 / 风险决策 / 基线管理 │
+└────────────────────────────────────────────────────────────────┘
+ │
+ ┌────────┴───────────────┐
+ ↓ ↓
+[requirements-analyst] [testcase-designer]
+ │ │
+ └────────┬───────────────┘
+ ↓
+ [env-manager] ──→ 串行(基础 connectivity 通过后)──→ [data-preparer]
+ │ │
+ └───────────────────┬──────────────────────────┘
+ ↓
+ [automation-engineer]
+ pytest 脚本 + /jmeter-script-gen → JMX
+ ↓
+ /smoke-test(门禁 95%)
+ ↓ 通过
+ [test-executor]
+ 功能回归(P0+P1)
+ ↓ 通过
+ [test-executor]
+ JMeter 性能(ci_quick / full)
+ ↓
+ [bug-manager]
+ ↓
+ [report-generator]
+ Allure + JMeter HTML + Word + 三端通知
+ ↓
+ test-lead 最终决策
+```
+
+---
+
+## ⚡ 快速开始
+
+### 1. GitHub 一键部署(最快)
+
+```bash
+# Mac / Linux 一行远程部署
+curl -fsSL https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.sh | bash -s -- /path/to/your-test-project
+
+# 或先 clone 再本地跑
+git clone https://github.com/Wool-xing/Test-Agent.git
+bash Test-Agent工作流搭建/install.sh /path/to/your-test-project
+```
+
+> 默认仓库为 `Wool-xing/Test-Agent`。fork 后将路径替换为你自己用户名(或用 `TEST_AGENT_REPO_URL` 环境变量覆盖)。Windows / 手动方式见 `docs/getting-started/部署说明.md`。
+
+`install.sh` 自动完成:克隆模板 → 装 Claude Code → 建目录 → 拷贝全部文件 → 装 Python 依赖 + Playwright。
+
+### 2. 配置 .env(敏感信息)
+
+```bash
+cd your-test-project
+cp .env.example .env
+# 编辑 .env,填入 TEST_APP_URL / TEST_DB_* / BUG_TRACKER + 对应字段 / WECHAT_WEBHOOK_URL 等
+```
+
+### 3. 启动 Claude Code
+
+```bash
+cd your-test-project
+claude
+```
+
+### 4. 在 Claude Code 提示符使用斜杠技能
+
+```text
+> /smoke-test # 10 分钟 P0 冒烟
+> /test-coordinator # 完整流程
+> /regression-test # 回归 + JMeter
+> /testcase-design # 仅生成用例 Excel
+> /python-script-gen # 生成 pytest 脚本
+> /jmeter-script-gen # 生成 JMeter JMX
+> /data-preparation # 测试数据 + JMeter CSV
+> /bug-submission # 按 BUG_TRACKER 路由提交 Bug
+```
+
+或自然语言:
+
+```text
+> 帮我对用户登录功能进行完整测试。需求:手机号+密码登录,记住密码,
+> 连续失败 5 次锁定 30 分钟。
+```
+
+> 注:`>` 后面是 Claude Code 提示符的输入(斜杠技能或自然语言),**不是 shell 命令**。
+
+详细启动指引(含 Java/JMeter/Allure 安装、.env 必填、首次跑通验证)→ `docs/getting-started/使用手册.md` 顶部「🚀 启动指引」章节。
+
+---
+
+## 📋 工作流选择指南
+
+| 场景 | 推荐工作流 | 耗时 | 用例范围 | 触发 |
+|------|-----------|------|---------|------|
+| 上线前快速验证 | `/smoke-test` | ~10 分钟 | P0 | 手动 / CI |
+| 新功能完整测试 | `/test-coordinator` | ~2-4 小时 | 全部 | 手动 |
+| 迭代后回归 | `/regression-test` | ~1-2 小时 | P0+P1 | CI 自动 |
+| 数据准备 | `/data-preparation` | ~5 分钟 | - | 测试前自动 |
+| Bug 提交 | `/bug-submission` | ~2 分钟/个 | - | 失败后 |
+
+---
+
+## 🔧 技术栈速查
+
+| 类型 | 框架/工具 | 版本 | 说明 |
+|------|-----------|------|------|
+| 接口测试 | requests + pytest + allure-pytest | pytest 7.4.3 | |
+| UI 测试 | playwright + pytest-playwright | playwright 1.40.0 | |
+| 性能测试(主) | Apache JMeter | 5.6.3(需独立装 Java + JMeter) | CI/release 门禁权威 |
+| 性能测试(备) | locust | 2.25.0 | 开发期 Python 内压测 |
+| 测试数据 | faker + factory-boy | 20.x + 3.3.0 | utils/data_factory |
+| 覆盖率 | pytest-cov | 4.1.0 | cov 指向 $APP_SRC_PATH |
+| 并行执行 | pytest-xdist | 3.5.0 | 默认 4 进程 |
+| 失败重试 | pytest-rerunfailures | 13.0 | 命令行显式开启 |
+| Mock | pytest-mock | 3.12.0 | unittest.mock 包装 |
+| 配置 | PyYAML | 6.0.1 | regression_modules.yaml |
+| Excel | openpyxl | 3.1.2 | utils/excel_generator |
+| Word 报告 | python-docx | 1.1.0 | utils/generate_report |
+| Bug 管理 | 5 adapter 工厂(禅道 默认 / Jira / GitHub Issues / Linear / Webhook) | - | utils/bug_manager + bug_tracker_*.py,由 `.env BUG_TRACKER` 选择 |
+| 通知 | webhook curl 直连 | - | utils/generate_report.send_*(企微/飞书/钉钉) |
+| 重试 | tenacity / 自实现 | 8.2.3 | utils/api_retry_util(10/20/40s) |
+| AI 模型 | Claude 4.x 系列 | Opus 4.7 / Sonnet 4.6 | Claude Code 默认管理 |
+| MCP | filesystem | npm @modelcontextprotocol | 仅启用 filesystem |
+
+---
+
+## 🔐 闭环约定(设计原则)
+
+1. **数据**:测试数据落 `workspace/测试数据/test_data.json`(conftest fixture 直接消费)
+2. **覆盖率**:cov 指向 `$APP_SRC_PATH`(被测系统源码,不是测试脚本本身)
+3. **重试策略**:全栈统一 10/20/40s(指数退避),由 `utils/api_retry_util.call_with_retry` 提供
+4. **severity/pri 映射**:1=P0 / 2=P1 / 3=P2 / 4=P3,由 `utils/bug_severity_map.py` 权威(跨 5 adapter 一致)
+5. **error_rate 单位**:百分比 pct(字段名 `_pct` 后缀),全栈一致
+6. **基线管理**:仅 release 分支 + full 模式 + 全门禁 PASS 才更新 `perf_baseline.json`
+7. **门禁分层**:smoke / regression / performance_full / performance_ci_quick,由 `utils/ci_quality_gate.py` 与 `utils/jmeter_result_parser.py` 统一实现
+8. **MCP 通道**:当前仅 filesystem。通知/Bug 走 SDK 直连,4 个自定义 mcp_server(zentao/wechat/feishu/dingtalk)按需后续实现
+9. **prod 环境**:`get_current_env()` 直接 raise,禁止误测生产
+10. **Flaky 与 reruns**:冒烟阶段不开 reruns(保留 flaky 信号),回归阶段开 reruns(快速反馈),flaky 由 history 离线归档检测
+11. **不可妥协边界(铭文锁死)**:本文档首节"五条铭文"具有最高优先级。当工程优化与伦理边界冲突,工程让步;当门禁阈值与公共利益冲突,门禁让步。**V1.0.0 阶段铭文不允许任何削弱**。开放双签机制的触发条件——(a) 团队 ≥ 20 人 **且** 已任命独立伦理责任人(不得由 test-lead 兼任);或 (b) 接入金融/医疗/司法合规行业并通过领域专家签字的伦理审查。**单一签字(包括 test-lead 兼任)不构成有效授权**
+12. **决策可追溯**:任何"放行 / 拒绝"判断必须留可回放证据——`workspace/执行日志/decisions/{date}_{decision_id}.json` 含输入快照 + 模型版本 + 阈值版本 + 判断结论 + 理由文本。归档不可删,仅可标 deprecated
+13. **三筐分类纪律**:所有候选测试项强制分类 Yes / No / **Too Hard**。第三筐显式归档不丢弃——"承认存在、暂不假装能测"比"沉默忽略"诚实
+14. **修改验证铁律**:任何 utils / agent / skill / config 文件改动,必须通过 **四关**:
+ - (a) `pytest --collect-only` 0 错误
+ - (b) 相关单测全过
+ - (c) `python -m utils` smoke import 通过
+ - (d) **下游 import 兼容性扫描**:`python -c "import utils"` 与 `python -c "from utils import *"` 验证改动未打断其他模块的依赖路径;如改 `utils/X.py`,额外跑 `grep -rn "from utils.X\|import utils.X" .` 列出依赖者并逐一 import 通过
+
+ 四关不全过不许 commit
+15. **工具兼容铁律**:新增依赖前先跑 `pip install --dry-run` 检查冲突;新 utils 必须不破坏既有 import 路径;agent / skill frontmatter 改动需通过 `darwin-skill` baseline 评分不下降
+16. **协作纪要不可删**:所有 AgentChat 讨论、反问、跨 agent 协调全部落 `workspace/执行日志/discussions/`,归档只可标 deprecated 不可删——讨论过程是审计证据链的一部分
+17. **自进化棘轮**:skill 改动后 darwin 评分必须 ≥ 原分;退步则强制 `git revert`;不能跑实测时降级 `dry_run` 标注,不静默跳过
+18. **依赖运行时补装铁律**:agent / skill 缺依赖时必须反问用户(带补装命令 + 影响范围),不静默自动装、不静默跳过、不假装能跑——见「📦 按需安装」§6
+
+---
+
+## 📂 部署后目录速览
+
+```text
+your-test-project/
+├── .claude/{agents,skills}/ ← 14 agent + 14 skill(13 业务 + darwin-skill)
+├── .github/workflows/test.yml
+├── Jenkinsfile
+├── utils/ ← 49 个 .py + __init__
+├── src/ ← 被测系统源码(cov 指向)
+├── workspace/
+│ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/
+│ ├── 自动化脚本/python/ jmeter/
+│ ├── regression_modules.yaml ← 回归范围配置(可选)
+│ └── 执行日志/
+│ ├── allure-results/ allure-report/
+│ ├── jmeter-results/ jmeter-report/
+│ ├── coverage.xml coverage-report/
+│ ├── baselines/perf_baseline.json
+│ ├── history/ ← junit-xml 归档
+│ ├── discussions/ ← AgentChat 讨论纪要 + 反问澄清 + 依赖补装记录
+│ ├── decisions/ ← 放行/拒绝决策日志(闭环约定 12)
+│ ├── skill-evolution/ ← darwin-skill results.tsv + 成果卡片
+│ ├── 截图/ 报告/
+├── conftest.py / pytest.ini / requirements.txt
+├── .mcp.json / .env
+```
+
+---
+
+## 🛠️ 升级 / 回滚 / 卸载
+
+详见 `docs/getting-started/部署说明.md` "升级 / 回滚 / 卸载" 章节。
+
+升级会覆盖:`.claude/agents/`、`.claude/skills/`、`utils/`、`conftest.py`、`pytest.ini`、`requirements.txt`、`.mcp.json`、`.github/workflows/test.yml`、`Jenkinsfile`。
+不会覆盖:`.env`、`workspace/`、`src/`。
+
+---
+
+## 🤝 协作与反馈
+
+- 文档结构、Bug 反馈:在仓库内提 issue
+- 功能扩展:先在 `agents/` 加 agent / `skills/` 加 skill,详见 `CONTRIBUTING.md`
+- 改动 `utils/` 时同步更新 `config/requirements.txt` 与 `ci/` 中的引用
+
+---
+
+## 🔄 跨 AI 工具兼容性
+
+**Claude Code 是默认 / 推荐 runtime,但本项目不强制绑定**。
+
+| 组件 | Claude Code 依赖 | 跨工具适配 |
+|------|----------------|----------|
+| `.claude/agents/*.md`(YAML frontmatter) | ✅ Claude Code spec | Cursor 用 `.cursorrules`;Continue.dev 用 `.continue/`;通用 LLM 拼接为 system prompt |
+| `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 |
+| `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 |
+| `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 |
+| `utils/*.py`(49 个,含 `__init__.py`) | ❌ 纯 Python | 跨工具完全可用 |
+| pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 |
+| CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 |
+| conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 |
+
+### 迁移成本
+
+- **工程链零改动**:utils + pytest + JMeter + CI 完全跨工具
+- **agent / skill 文档需重写**:迁移到 Cursor / Continue / 其他工具的对应格式
+- **失去**:Claude Code skill 自动加载、Agent tool 子专家协调、`.claude/` 目录约定
+
+### 模型选择
+
+- README 中 Claude 4.x(Opus 4.7 / Sonnet 4.6 / Haiku 4.5)是**推荐**而非强制
+- 项目代码本身**不调用任何 LLM API**(utils 全是工具代码)
+- 模型由 Claude Code 账户级管理:`claude /login` + `/model` 切换
+- 用其他 AI 工具时按其规范选模型即可
+
+---
+
+## 🏗️ 测试架构合理性深度(金字塔 / 左移 / 右移 / 可观测 / 门禁)
+
+> 本节是项目方法论核心。回答:"为什么这套架构合理?" "全球顶尖测试团队怎么看?"
+
+### 1. 测试金字塔 2024 现代版
+
+**经典金字塔**(Mike Cohn 2009):单元 70% / 集成 20% / E2E 10%。
+
+**2024 现代调整**(Google Testing Blog / Microsoft Engineering Fundamentals 综合):
+
+```text
+ ┌─────────────────────┐
+ │ E2E / 视觉回归 10% │ ← Playwright / Appium / Airtest(慢但必要)
+ ├─────────────────────┤
+ │ 系统/契约 20% │ ← API + 服务间 + Pact + jsonschema + Mock
+ ├─────────────────────┤
+ │ 集成/组件 30% │ ← pytest + pytest-mock + WireMock
+ ├─────────────────────┤
+ │ 单元 40% │ ← pytest(秒级反馈,含变异测试)
+ └─────────────────────┘
+ ↑
+ 变异测试(mutation_runner)反向验证用例有效性
+```
+
+**与经典模型差异**:
+- **不再 70%/20%/10% 一刀切**,按"变更频率 + 阻塞代价"重新分布
+- 单元层增加变异测试 — 用例有效性必须可量化(不只覆盖率)
+- 契约层独立成层(Pact/jsonschema/openapi_test_gen)— 微服务时代必备
+- 视觉回归归 E2E 层(不另设层)— SSIM/OCR 与 E2E 一同 owner
+
+**Test-Agent 落地**:
+- 单元:`pytest + pytest-mock`(项目自身 utils 层 Phase 2 补齐自测)
+- 集成:`pytest` 内嵌 + `wiremock 3.3.1` Mock Server
+- 契约:`utils/contract_test.py` (Pact + jsonschema) + `utils/openapi_test_gen.py`
+- E2E:`Playwright`(Web/Electron)+ `Appium`(移动)+ `Airtest`(视觉)
+- 变异:`utils/mutation_runner.py`(mutmut)
+
+### 2. Shift-Left(左移)— 测试介入越早越便宜
+
+**Boehm 法则**:缺陷修复成本随开发阶段呈指数增长(需求 1× → 设计 5× → 编码 10× → 测试 50× → 生产 200×)。
+
+**Shift-Left 实施层级**(从最早到最晚):
+
+| 层 | 介入点 | 工具 / utils | 阻断力 |
+|----|--------|------------|--------|
+| L1 | **需求阶段** | `requirements-analyst` 双轨输出(MD + JSON)+ 风险矩阵 | 弱(评审) |
+| L2 | **设计阶段** | `testcase-designer` 等价类/边界值/状态迁移/配对测试 + 风险矩阵 | 弱(评审) |
+| L3 | **IDE 编码时** | ruff + mypy + IDE 实时提示 | 强(编辑器红线) |
+| L4 | **commit 前 (pre-commit)** | gitleaks + ruff + private-source 防护 + .env 防护 + 14/14/49 文件统计 | 强(阻断 commit) |
+| L5 | **PR gate** | CodeQL + pip-audit + safety + ci.yml 全套 | 强(阻断合入) |
+| L6 | **静态分析** | Bandit(Python SAST)+ ZAP/Burp Pro(DAST) | 中(发现/修) |
+| L7 | **契约测试** | `utils/contract_test.py` consumer-side / provider-side | 强(CI 阻断) |
+
+**Test-Agent 现状评估**:L1-L5 已串通;L6 在 utils 已有 `security_scanner.py`;L7 utils 存在但未串成"自动 PR 阻断"链路。
+
+**Phase 2 收尾点**:把 L7 契约测试串成"PR 改了 OpenAPI spec → 自动跑 contract → 不通过阻断合入"链路。
+
+### 3. Shift-Right(右移)— 生产即测试环境
+
+**核心理念**:测试不止于发布前;通过生产监测 + 安全发布机制 + 主动故障注入持续验证质量。
+
+**Shift-Right 实施层级**:
+
+| 层 | 机制 | 工具 / utils | Test-Agent 状态 |
+|----|------|------------|----------------|
+| R1 | **合成监控**(Synthetic Monitoring) | 定时跑核心路径(登录/下单),24h 覆盖 | ⚪ 路线图 Phase 3 加 `utils/synthetic_monitor.py` |
+| R2 | **真实用户监测**(RUM) | Web Vitals 上报 + 前端错误堆栈 | ✅ `utils/web_vitals_collector.py`(采集端) |
+| R3 | **链路追踪**(Distributed Tracing) | Jaeger / Zipkin + traceID 业务断言 | ✅ `utils/tracing_validator.py` |
+| R4 | **金丝雀发布**(Canary)+ **特性开关**(Feature Flag) | 渐进放量 + 回滚阀 | ⚪ 路线图 Phase 3 加 `utils/canary_runner.py` + `feature_flag_validator.py` |
+| R5 | **混沌工程**(Chaos Engineering) | 主动注入 CPU/内存/磁盘/网络/进程/k8s 故障 | ✅ `utils/chaos_helper.py` |
+| R6 | **灾备演练**(Failover Drill) | 主动 kill-pod + 数据一致性校验 | ✅ `utils/chaos_helper.kill_pod` |
+| R7 | **A/B 测试**(Experimentation) | 多版本流量切分验证 | ⚪ 业务方按需自加 |
+| R8 | **DORA 4 指标**(部署频率 / Lead Time / 失败率 / MTTR) | DevOps 健康度量 | ✅ `utils/dora_metrics.py` |
+| R9 | **SLO/错误预算** | SLI 阈值 + 错误预算燃烧率 | ✅ `utils/slo_validator.py` |
+
+**Phase 3 收尾点**:补 R1(合成监控)+ R4(canary/feature flag),完成 Shift-Right 闭环。
+
+### 4. 可观测性(Observability)三柱 + 测试可视化
+
+**三柱**(OpenTelemetry 标准):
+- **Traces**(链路):`utils/tracing_validator.py`
+- **Metrics**(指标):JMeter result + DORA + flaky rate
+- **Logs**(日志):pytest log + logcat / iOS syslog(mobile_driver)+ 系统日志(desktop_driver)
+
+**测试侧可观测**(独立于业务可观测性):
+
+| 维度 | 数据源 | 现状 | 可视化目标 |
+|------|--------|------|----------|
+| 用例通过率 | junit-xml | ✅ Allure | Allure 报告 |
+| 覆盖率 | coverage.xml | ✅ pytest-cov HTML | 覆盖率 HTML |
+| 性能基线 | jmeter-results/result.jtl | ✅ JMeter HTML + baseline.json | JMeter HTML |
+| Flaky 率 | history/junit-xml | ✅ flaky_detector | ⚪ 缺统一仪表盘 |
+| DORA 4 指标 | git log + 缺陷库 | ✅ dora_metrics.py | ⚪ 缺统一仪表盘 |
+| 缺陷密度/逃逸率/重开率 | bug tracker | ✅ bug-manager 内嵌 | ⚪ 缺统一仪表盘 |
+| 用例减重信号 | 覆盖率 + Jaccard | ✅ suite_minimizer | ⚪ 报告内嵌 |
+| 变异分数 | mutmut | ✅ mutation_runner | ⚪ 报告内嵌 |
+
+**Phase 3 收尾点**:整合 flaky/DORA/缺陷密度/变异分数到统一 dashboard(Grafana 或 静态 HTML)。
+
+### 5. 质量门禁分层(Layered Quality Gates)
+
+**为什么分层**:一刀切门禁要么过严卡死开发节奏,要么过松形同虚设。分层 = 不同阶段不同严苛度。
+
+**Test-Agent 五层门禁**:
+
+| 层 | 触发 | 关键阈值 | 不达标处置 | 实现 |
+|----|------|---------|----------|------|
+| **smoke** | 每次 commit/PR | P0 通过率 ≥95% + 0 新 P0 Bug + API ≤3000ms | 阻断后续 | `utils/ci_quality_gate.py::GATES['smoke']` |
+| **regression** | merge 到 main / develop | P0=100% / P1≥95% / 总体≥90% / cov ≥80% / Flaky <5% | 评估遗留风险 | `utils/ci_quality_gate.py::GATES['regression_p0_p1']` |
+| **performance_ci_quick** | CI 默认(5 并发) | TPS≥20 / P95≤800ms / err <1% | 警告不阻 | `utils/jmeter_result_parser.DEFAULT_GATES_CI_QUICK` |
+| **performance_full** | release/* 分支 + 手动(50 并发) | TPS≥100 / P95≤500ms / 基线回归 <20% | 阻断 release | `utils/jmeter_result_parser.DEFAULT_GATES_FULL` |
+| **release** | 上线前 | 上述全 PASS + bug-manager 审批 + test-lead 决策 | 不上线 | `agents/01-测试主管.md::上线决策` |
+
+**门禁可配置性**:阈值集中在 `utils/ci_quality_gate.py::GATES` + `utils/jmeter_result_parser.py::DEFAULT_GATES_*`。Phase 2 抽 `quality_gate_engine.py` + yaml 驱动,让用户改阈值不需改代码。
+
+**Flaky vs Reruns 设计哲学**:
+- **冒烟阶段**:不开 reruns,**保留 flaky 信号**(Flaky 是质量问题,不是网络问题)
+- **回归阶段**:开 reruns(`--reruns=2 --reruns-delay=5`),**追求快反馈**
+- **Flaky 检测**:`utils/flaky_detector.py` 离线扫 history,失败率 >30% 标 quarantine
+- **Quarantined 用例**:单独 marker `@flaky`,不计入门禁,每周清理
+
+### 6. 调整路径(Phase 触发条件 + 关键交付)
+
+> **不绑月份编号**——按触发条件推进,不按日历推进。"写进路线图就是承诺",三年后回看不打脸的承诺,才写。
+
+**Phase 触发条件总表**:
+
+| Phase | 触发条件 | 标志性交付 |
+|------|---------|----------|
+| **Phase 1**(当前 V1.0.0) | 概念宪章成 + 工程基线就绪 | 14 agent + 14 skill + AgentChat + Bug 多适配 + 按需安装 + darwin-skill 集成 |
+| **Phase 2** | utils 单测覆盖 ≥ 60% 且团队 ≥ 5 人 | 契约链路串通 + 门禁引擎 yaml 抽象 + 反问 KB 重新评估 |
+| **Phase 3** | Phase 2 全交付 + 接入 ≥ 2 行业 | 合成监控 + canary/feature flag + 统一 dashboard + 沉默故障 + 缺席者注入 |
+| **Phase 4** | 接入合规行业(金融/医疗/司法)| 证据链司法可采信打包 + 数字考古学家 + AI 测试深化 |
+| **Phase 5** | 多语种多文化接入需求 | 神圣性守护 + 禁忌矩阵 + 跨文化 i18n |
+
+**当前路线图详表**:
+
+| 维度 | 现状 | 落点 Phase | 关键交付 |
+|------|------|-----------|---------|
+| **金字塔单元层** | 弱(utils 自身无测试) | Phase 2 | `tests/test_utils_*.py` 全覆盖 + 变异测试反向用 |
+| **Shift-Left L7 契约链路** | utils 雏形未串通 | Phase 2 | OpenAPI 改动 → contract → PR 阻断 |
+| **门禁引擎抽象** | 阈值写死代码 | Phase 2 | `utils/quality_gate_engine.py` + yaml 驱动 |
+| **Shift-Right R1 合成监控** | 缺 | Phase 3 | `utils/synthetic_monitor.py` |
+| **Shift-Right R4 canary + feature flag** | 缺 | Phase 3 | `utils/canary_runner.py` + `feature_flag_validator.py` |
+| **可观测统一 dashboard** | 散落 HTML 报告 | Phase 3 | DORA + 缺陷密度 + flaky + 变异分数 → Grafana / 静态 HTML 模板 |
+| **伦理 / 偏见审计** | 散落 utils | Phase 3 | 数据集偏差扫描 + 决策公平性指标 + 偏见门禁 |
+| **沉默故障检测** | 缺 | Phase 3 | tracing 阈值漂移 + 无报警恶化检测器 |
+| **缺席者场景注入** | a11y/i18n 已有 | Phase 3 | 边缘场景剧本库(残障/老年/未成年/未联网/精神危机) |
+| **AI 测试深化** | 漂移 + LLM eval | Phase 4 | + prompt 版本回归 + RAG 召回精度 + token 成本门禁 + hallucination rate |
+| **证据链 / 司法可采信打包** | 散落 | Phase 4 | 决策日志 + 模型版本 + 数据集 → 标准送审包 |
+| **数字考古学家**(遗留系统初始假设回溯) | 缺 | Phase 4 | 知识图谱冷启动 + Why 数据库 |
+| **神圣性守护 + 禁忌矩阵** | 缺 | Phase 5 | 跨文化禁忌词/色/数/节日组合(本地化共建) |
+| **darwin-skill 集成(自进化)** | ✅ V1.0.0 已并入 | Phase 1 | 上游 SKILL.md + workspace 落 results.tsv + 季度同步 |
+| **Bug Tracker 多适配** | ✅ V1.0.0 已并入 | Phase 1 | 5 套适配器(zentao/jira/github/linear/webhook)+ 工厂模式 |
+| **AgentChat 协作协议** | ✅ V1.0.0 已并入 | Phase 1 | discussions/ 纪要 + test-lead 中枢路由 + 反问 3 级预算 |
+| **按需安装与依赖分层** | ✅ V1.0.0 已并入 | Phase 1 | 6 requirements 文件 + install.sh 交互 + 运行时补装回路 |
+
+> **第三公理在此节兑现**:项目有意识地**少承诺**——文明级伦理议题(如缓慢暴力、末日哨兵、神圣性守护)我们承认其存在,但**不在工程路线图上假装能做**。如果未来接入特定行业(金融 / 医疗 / 司法)需要其中某项能力,由业务方按需单独立项,不绑进通用框架。
+
+---
+
+## ❓ 关键反问清单(决策入口)
+
+> 进入项目重大决策前,按场景挑相应反问做一次自检——比直接动手安全 10 倍。
+> 这些反问的回答应落档到 `discussions/{date}_strategic-questions.md`。
+
+### 落地与可行性
+
+- 哪 3 项能在 6 个月内做 MVP?哪些需 5 年以上数据?
+- 如何把"测试热寂""意义感流失""缓慢暴力"转成 CI/CD 可消费的数值?
+- "好奇心税"与"反目标函数"的额外算力如何 ROI?
+
+### 架构与角色
+
+- 单一巨型 Agent vs 专科 Agent 群?(当前选专科 + test-lead 中枢)
+- 业务交付 Agent 与权力审计 Agent 冲突时谁仲裁?
+- 元测试递归到第几层停止?
+
+### 行业适配
+
+- 第一个切入的行业是哪个?为什么?(见「📋 开放问题」Q1)
+- 跨行业隐喻迁移规则是什么?
+- 遗留系统冷启动需要哪种最小知识图谱?
+
+### 伦理与治理
+
+- Agent 被垄断企业部署时,如何防止测试范围被裁剪?(铭文 2)
+- 你愿意写下哪一条"不可逾越"的硬规则?(铭文)
+- Agent 被强制关闭前的"遗嘱"留给谁?(铭文 5 + 熄火协议)
+
+### 哲学与终局
+
+- 你愿意亲手设计一个走向自我消解的 Agent 吗?
+- 是否刻意保留"无害但不可预测"的缺陷?
+- 你心中"绝不应被测试"的事是什么?(第三公理)
+
+---
+
+## 📋 开放问题与待决议事项
+
+> **每条决策落定后须更新本表 + 在「🗺️ 项目当前状态」节追加里程碑**。
+> 状态:⏳ 未定 / 🔄 评估中 / ✅ 已定 / ❌ 否决
+
+| # | 议题 | | |
+|---|------|---------|------|
+| | | | |
+| Q2 | Agent 架构:单体 vs 专 | | V1.0.0 选专科 + test-lead 中枢 |
+| Q3 | 五条铭文的技术实现机制(不可变区域、熔断条件)? | 🔄 | V1.0.0 铭文锁死,无削弱机制;Phase 4 接入合规行业后重新设计 |
+| Q4 | 独立审计署的法律实体形态? | ⏳ | 触发条件:团队 ≥ 20 人 或 接入合规行业 |
+| Q5 | 末日哨兵权的触发授权链? | ⏳ | 需监管/学界共识,Phase 4 |
+| | | | |
+| Q7 | 团队最小配置(工程/行业专家/伦理责任人)? | ⏳ | V1.0.0 单人可启动;剥离伦理责任人需 ≥ 20 人 |
+| Q8 | 与现有 AI 测试平台(Mabl / Applitools / Functionize)的差异化定位? | ⏳ | 候选定位:「承诺学科 + 伦理护栏 + 行业隐喻先行」 |
+
+---
+
+## 📖 关键术语表
+
+宪章与工程文档共用术语。读者重新进入项目时,从这里建立词汇基线。
+
+| 术语 | 释义 |
+|------|------|
+| 承诺学科 | 把测试从"检查代码"推进到"检查承诺"——金融的守恒、医疗的可逆、司法的可采信,都是承诺 |
+| 隐喻先行 | 进入新行业前先建立"根本隐喻"档案,决定该测什么承诺、不碰什么红线 |
+| 三筐分类 | Yes / No / **Too Hard**。大部分事进第三筐;不做决策也是决策 |
+| 三公理 | 项目最高纲领(见首节)——承诺检验 / 谦卑义务 / 命名不可测之物 |
+| 铭文 | 写入项目不可变区域的伦理约束(见首节五条铭文) |
+| 认知债务 | 曾经存在但已被遗忘的设计 Why。数字考古学家的工作对象 |
+| 测试热寂 | 所有测试通过、信息量趋零的状态。靠变异测试 + suite_minimizer 反向破解 |
+| 缓慢暴力 | 跨年级别才显现的算法伤害(如教育算法十年后的代际效应)——单次发布无法发现 |
+| 哥德尔宣告 | 明确声明某属性"真但不可测"。**承认局限,不假装能测** |
+| 现实缝合力 | 信息平台抵抗真假混淆的能力。深度伪造时代核心 |
+| 沉默故障 | 不报警的恶化——指标看着正常但用户体验/语义已塌 |
+| 末日哨兵 | 极端风险下越过流程直达全人类的预警机制——需监管/学界共识授权 |
+| 缺席者代言 | 为未联网者、残障者、未出生者保留测试用例配额 |
+| 熄火协议 | Agent 被关闭前的遗嘱与决策链留存规则——三端通知 + Word 报告 + decisions/ 归档 |
+| 货物崇拜 | 形式齐备但实质缺失——飞机跑道堆好了,飞机不会降落。本项目最大敌人之一 |
+| Skin in the Game | 是否承担后果。Agent 的判断无 skin,因此最终决策由 test-lead 签字 |
+| Via Negativa | 通过命名"不做的事"而非"做的事"来定义边界。本项目用它显式标注 darwin-skill 不自学习、反问不建 KB |
+| 棘轮机制 | 改进后总分必须严格高于改进前才保留;退步自动回滚——darwin-skill 与门禁共用 |
+
+---
+
+## 📜 LICENSE / CHANGELOG / CONTRIBUTING / SECURITY
+
+- **LICENSE**:MIT(详见 [`LICENSE`](LICENSE))
+- **CHANGELOG**:详见 [`CHANGELOG.md`](CHANGELOG.md)(V1.0.0 首版含 darwin-skill 集成 / Bug 多适配 / AgentChat 协议 / 按需安装 + 运行时补装 / 永久宪章定位)
+- **VERSION**:详见 [`VERSION`](VERSION)
+- **CONTRIBUTING**:详见 [`CONTRIBUTING.md`](CONTRIBUTING.md)(含同步铁律 + RACI 矩阵)
+- **SECURITY**:详见 [`SECURITY.md`](SECURITY.md)(漏洞报告流程 + GitHub Security Advisories 入口)
+- **CODE_OF_CONDUCT**:详见 [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md)(基于 Contributor Covenant 2.1)
+
+---
+
+## 🗺️ 项目当前状态与下次会话快速指引
+
+### 当前阶段(最后更新:2026-05-11)
+
+- **Phase**:Phase 1(V1.0.0 工程基线 + 概念宪章已成)
+- **关键已交付**:14 agent + 14 skill + AgentChat + Bug 多适配 + 按需安装(含运行时补装) + darwin-skill 集成
+
+### 历史关键决议
+
+- 2026-05-11:宪章四章 + 三公理 + 五铭文起草完成(基于 DeepSeek 四轮 + Claude 整理)
+- 2026-05-11:FULL_GUIDE.md 糅合全局记忆,确立永久宪章地位
+- 2026-05-11:darwin-skill 不消费运行数据(Via Negativa 显式标注);反问 KB 不进 V1.0.0
+- 2026-05-11:V1.0.0 阶段铭文锁死,单签兼任不构成有效授权
+
+### 下次会话进入项目时,按顺序检查
+
+1. 本节「当前阶段」是否仍是 Phase 1?是否有新里程碑?
+2. 「📋 开放问题」第 Q1-Q8 是否有新决议?
+3. 「Phase 触发条件总表」哪一行的触发条件已达成?
+4. 「🎭 关键模块清单」是否有模块从 ⚪/❌ 升级到 ✅?
+5. 是否需要扩写某一章节为深度版?
+6. 是否需要把开放问题转成 Jira 风格的可分配任务?
+
+### 来源与引用(认知史)
+
+- 第一轮(DeepSeek):测试 Agent 七阶段架构
+- 第二轮(DeepSeek):认知暗物质 + 10 个反问
+- 第三轮(DeepSeek):抽象/探索/哲学维度
+- 第四轮(DeepSeek):全人类 + 全行业视角
+- 第五轮(Claude 补充):神圣 / 危机 / 临界层 10 个新增
+- 整理框架:八大簇 → 九大簇演进(Claude 整理)
+- 宪章草案:四章 + 三公理 + 五铭文(Claude 草拟)
+- V1.0.0 工程基线:14 agent + 14 skill + utils 49 个 + CI/CD(项目自建)
+- 永久宪章糅合(2026-05-11):FULL_GUIDE 工程主体 + 全局记忆哲学维度合一
+
+---
+
+*本文档是活的,每次重大决策后须更新「📋 开放问题」与「🗺️ 项目当前状态」两节。改其他章节须经 test-lead review,符合闭环约定 14/15/16。*
diff --git a/docs/history/2026-5-11 README_DETAIL 013731.md b/docs/history/2026-5-11 README_DETAIL 013731.md
new file mode 100644
index 0000000..a6df55c
--- /dev/null
+++ b/docs/history/2026-5-11 README_DETAIL 013731.md
@@ -0,0 +1,415 @@
+# Test-Agent 测试全流程专家团队
+
+**项目目录名**:`Test-Agent工作流搭建`
+**版本**:V1.0.0
+**更新日期**:2026-05-10
+**模型**:Claude 4.x 系列(Opus 4.7 / Sonnet 4.6 / Haiku 4.5,由 Claude Code 默认管理)
+
+---
+
+## 📚 文档导航
+
+| 路径 | 文档 | 说明 | 适用对象 |
+|------|------|------|----------|
+| 根目录 | README.md | 本文档(项目入口) | 所有用户 |
+| **根目录** | **00-项目导航.md** | **按职责分类速查(通用流程 / 平台专项 / 协议 / 输入 / CI)** | **所有用户** |
+| `docs/getting-started/` | 使用手册.md | 快速上手指南 + FAQ | 所有用户 |
+| `docs/getting-started/` | 部署说明.md | 跨平台部署(Win/Mac/Linux 含 Java/JMeter/Allure) | 运维/测试 |
+| `docs/getting-started/` | 配置清单.md | 一站式配置文档(.env 全字段 + Secrets + Webhook 申请) | 所有用户 |
+| `docs/getting-started/` | 交付物清单.md | 测试计划 / 测试报告 / Bug 等对外提交物落地位置与责任 | 所有用户 |
+| `agents/` | 14 个 .md(9 核心 + 5 平台扩展) + README 索引 | Agent 定义文件 | 开发人员 |
+| `skills/` | 13 个 Skill 文件 + README 索引 | 可复用测试技能 | 开发人员 |
+| `config/` | conftest.py / pytest.ini / .env.example / .mcp.json / requirements.txt | 配置文件集合 | 开发人员 |
+| `config/` | mcp-server-impl.md | MCP server 自实现教程(zentao/wechat/feishu/dingtalk 骨架) | 高级开发 |
+| `utils/` | utils(49 个 .py + init)+ README 索引(多分类) | 完整可运行 Python 工具集 | 开发人员 |
+| `ci/` | github-actions-test.yml / jenkins-pipeline.groovy / 集成说明.md | CI/CD 流水线(含 JMeter 性能阶段) | DevOps |
+
+---
+
+## 🚀 核心特性
+
+### 13 位专家 + 1 位协调者(核心 8 + 平台扩展 5 + test-lead)
+
+| 角色 | 职责 |
+|------|------|
+| **test-lead**(协调者) | 全局调度、质量把控、发布决策、基线管理 |
+| requirements-analyst | 测试范围界定、风险识别、业务规则梳理(输出 MD + JSON 摘要) |
+| testcase-designer | 等价类/边界值/场景法,P0~P3 分级,4 Sheet Excel |
+| env-manager | 环境健康检查、多环境切换、Docker 支持 |
+| data-preparer | 数据工厂(Faker+Factory Boy)、自动清理、脱敏、JMeter CSV |
+| automation-engineer | Playwright(UI)+ requests(API)+ JMeter 驱动(性能)+ Locust(开发期备用) |
+| test-executor | 并行执行、失败分类、Flaky 隔离、JMeter 性能阶段 |
+| bug-manager | 禅道提交(severity 1=P0)、生命周期追踪、回归验证 |
+| report-generator | Allure + JMeter HTML + Word + 三端通知(企微/飞书/钉钉,curl 直连) |
+
+### 13 个执行技能
+
+**核心 8 个**:
+
+- `smoke-test`:10 分钟 P0 冒烟(含 1 分钟缓冲,门禁 95%)
+- `test-coordinator`:完整流程编排
+- `regression-test`:P0+P1 回归 + Flaky 检测 + JMeter 性能验证
+- `testcase-design`:4 Sheet Excel 用例
+- `python-script-gen`:pytest UI/API 脚本
+- `jmeter-script-gen`:JMeter JMX 脚本(CI quick / full 双模式)
+- `data-preparation`:测试数据 + JMeter 参数化 CSV
+- `zentao-bug-submission`:禅道 Bug 规范提交
+
+**平台扩展 5 个**:
+
+- `mobile-test`:Android / iOS / 微信小程序(Appium + 微信 CLI)
+- `desktop-test`:Windows EXE / macOS GUI / Electron(pywinauto + Playwright Electron)
+- `visual-test`:游戏 / 视觉回归 / OCR(Airtest + OpenCV + Tesseract)
+- `system-test`:IoT / 音视频 / 链路追踪 / MQ(SSH+串口+MQTT+FFmpeg+Jaeger+Kafka)
+- `ai-test`:模型质量 / 数据漂移 / 公平性 / LLM 评估
+
+### 工程级质量门禁(分层)
+
+**功能门禁**
+
+| 指标 | 冒烟 | 回归 |
+|------|------|------|
+| P0 通过率 | ≥95% | 100% |
+| P1 通过率 | - | ≥95% |
+| 整体通过率 | - | ≥90% |
+| 代码覆盖率($APP_SRC_PATH) | - | ≥80% |
+| Flaky 比例 | - | <5% |
+
+**性能门禁(双模式)**
+
+| 指标 | full(50并发) | ci_quick(5并发) |
+|------|--------------|------------------|
+| TPS | ≥100 | ≥20 |
+| P95 响应 | ≤500ms | ≤800ms |
+| 平均响应 | ≤200ms | ≤400ms |
+| 错误率 (pct) | <1% | <1% |
+| 基线回归 | <20% | 不强制 |
+
+### 工程化规范
+
+- **指数退避重试**:`utils/api_retry_util.call_with_retry`(10s → 20s → 40s)
+- **pytest-xdist** 并行执行(默认 4 进程,可调)
+- **Flaky 检测与隔离**:`utils/flaky_detector` + `workspace/执行日志/history/` 归档
+- **性能基线管理**:`workspace/执行日志/baselines/perf_baseline.json`,仅 release+full+PASS 自动更新
+- **CI/CD 就绪**:GitHub Actions + Jenkins,性能阶段双模式分层
+- **MCP 收口**:当前仅启用 filesystem;通知/Bug 走 SDK 直连
+
+---
+
+## 🌐 全链路覆盖矩阵(三视角)
+
+### 矩阵 A:产品形态覆盖
+
+| 产品形态 | 工具栈 | 责任 Agent | 关联 Skill | 状态 |
+|---------|-------|-----------|----------|------|
+| Web(PC + 移动 H5) | Playwright | automation-engineer | `/python-script-gen` | ✅ |
+| REST / GraphQL / SOAP API | requests / protocol_helper | automation-engineer | `/python-script-gen` | ✅ |
+| Android APP | Appium + adb | mobile-tester | `/mobile-test` | ✅ |
+| iOS APP | Appium + XCUITest | mobile-tester | `/mobile-test` | ✅ |
+| 微信 / 支付宝 / 抖音小程序 | 微信开发者工具 CLI | mobile-tester | `/mobile-test` | ✅ |
+| Windows EXE | pywinauto + uiautomation | desktop-tester | `/desktop-test` | ✅ |
+| macOS .app | PyAutoGUI + AppleScript | desktop-tester | `/desktop-test` | ✅ |
+| Linux GUI | atspi + xdotool | desktop-tester | `/desktop-test` | ✅ |
+| Electron 跨平台 | Playwright Electron API | desktop-tester | `/desktop-test` | ✅ |
+| 游戏 / Canvas / WebGL / Unity | Airtest + OpenCV | visual-tester | `/visual-test` | ✅ |
+| IoT / 嵌入式 | SSH + 串口 + MQTT + Modbus | system-tester | `/system-test` | ✅ |
+| 音视频 / 流媒体 | FFmpeg + ffprobe | system-tester | `/system-test` | ✅ |
+| AI / ML 模型 + LLM | scikit-learn + scipy + LLM eval | ai-tester | `/ai-test` | ✅ |
+| 区块链 / 智能合约 | Web3 + Slither + Foundry | system-tester | `/system-test` | ✅ |
+| 数据库 | SQLAlchemy + db_test_helper | data-preparer | `/data-preparation` | ✅ |
+
+### 矩阵 B:测试类型覆盖
+
+| 测试类型 | 工具 / utils | 责任 Agent | 状态 |
+|---------|------------|-----------|------|
+| 功能(unit / integration / e2e / UAT BDD) | pytest + pytest-mock + pytest-bdd | automation-engineer | ✅ |
+| 性能(基准/负载/压力/Volume/Spike/Soak/SLO) | JMeter + Locust + slo_validator + soak_runner | test-executor | ✅ |
+| 安全(SAST/DAST/依赖/Header/TLS/API/Fuzzing) | Bandit + Safety + ZAP + Burp Pro + api_security_scanner + fuzzer | bug-manager | ✅ |
+| 兼容(浏览器/OS/分辨率/语言矩阵) | compatibility_matrix(pairwise) | testcase-designer | ✅ |
+| 弱网(3G/4G/wifi_weak/satellite/offline) | tc + Toxiproxy + network_throttle | test-executor | ✅ |
+| 稳定(Android Monkey + 长时 soak + 内存泄漏) | mobile_driver.run_monkey + soak_runner | mobile-tester | ✅ |
+| 可靠性(重连/重试/降级/熔断) | api_retry_util + 业务故障注入 | automation-engineer | ✅ |
+| 混沌(CPU/内存/磁盘/网络/进程/k8s) | chaos_helper | test-executor | ✅ |
+| 灾备 / Failover | chaos_helper.kill_pod + 数据一致性校验 | test-executor | ✅ |
+| UX(任务时长/点击数/TTI/恢复率) | ux_metrics.UXTracker | testcase-designer | ✅ |
+| 易用性(Nielsen 10 + 角色扮演) | 人工 walkthrough | testcase-designer | ✅ |
+| 探索性(SBTM session + heuristics) | charter 模板 + 录屏 | testcase-designer | ✅ |
+| 前端性能 Web Vitals(LCP/FID/CLS/INP) | web_vitals_collector | automation-engineer | ✅ |
+| A11y 无障碍(WCAG 2.1) | a11y_scanner(axe + Lighthouse + pa11y) | testcase-designer | ✅ |
+| 国际化 / 本地化(多语言/RTL) | i18n_checker | testcase-designer | ✅ |
+| 数据库(事务/死锁/迁移/备份恢复/主从) | db_test_helper | data-preparer | ✅ |
+| 契约测试(Pact / jsonschema) | contract_test + openapi_test_gen | automation-engineer | ✅ |
+| 视觉回归(SSIM + OCR + diff) | visual_helper | visual-tester | ✅ |
+| AI 对抗 / LLM 越狱 / Prompt Injection | ai_adversarial | ai-tester | ✅ |
+| 变异测试(用例有效性) | mutation_runner | testcase-designer | ✅ |
+| DORA 4 指标 + 缺陷密度 + 套件减重 | dora_metrics + suite_minimizer | bug-manager | ✅ |
+
+### 矩阵 C:用例设计方法(ISTQB 经典)
+
+| 方法 | 实现 | 责任 Agent | 状态 |
+|------|------|-----------|------|
+| 等价类划分 / 边界值 | 文档 + Excel 模板 | testcase-designer | ✅ |
+| 判定表 / 因果图 | 文档手动 + Excel | testcase-designer | ✅ |
+| 场景法 / 错误推测 | 文档 | testcase-designer | ✅ |
+| 状态迁移法(0/1-switch + 负例) | state_machine_tester | testcase-designer | ✅ |
+| 配对测试(Allpairs) | pairwise_generator | testcase-designer | ✅ |
+| 正交实验法 | compatibility_matrix(隐含) | testcase-designer | ✅ |
+| 探索性测试(SBTM) | charter 模板 | testcase-designer | ✅ |
+| 易用性走查(Nielsen 10) | 人工 + 检查清单 | testcase-designer | ✅ |
+| 基于风险的测试 | 风险矩阵文档 | test-lead | ✅ |
+
+### 矩阵 D:协议覆盖
+
+| 协议 | 实现 utils | 状态 |
+|------|----------|------|
+| HTTP / HTTPS | api_retry_util | ✅ |
+| WebSocket(同步/异步/重连/并发) | websocket_helper | ✅ |
+| gRPC / TCP / UDP / GraphQL / SOAP / Modbus | protocol_helper | ✅ |
+| MQTT / SSH / 串口 | iot_helper | ✅ |
+| Kafka / RabbitMQ | mq_helper | ✅ |
+| Jaeger / Zipkin(链路追踪) | tracing_validator | ✅ |
+
+### 测试金字塔分布
+
+```text
+ E2E(10%) ← Playwright/Appium,慢但必要
+ /集成(20%)/ ← API + 服务间 + Mock
+ /单元(70%)/ ← pytest + pytest-mock,秒级反馈
+```
+
+**总覆盖率 ~95%**(含闭环:Bug 禅道 + 三端通知 + CI/CD GitHub Actions/Jenkins + Dependabot)
+
+剩 ~5% 为高度专业合规领域(HIPAA 医疗 / SOC2 金融 / DO-178C 航空 / IEC61508 工业控制)—— 业务方按需自加。
+
+---
+
+## 🏗️ 架构图(运行时)
+
+```
+┌────────────────────────────────────────────────────────────────┐
+│ test-lead(协调者) │
+│ 全局调度 / 质量门禁 / 风险决策 / 基线管理 │
+└────────────────────────────────────────────────────────────────┘
+ │
+ ┌────────┴───────────────┐
+ ↓ ↓
+[requirements-analyst] [testcase-designer]
+ │ │
+ └────────┬───────────────┘
+ ↓
+ [env-manager] ──→ 串行(基础 connectivity 通过后)──→ [data-preparer]
+ │ │
+ └───────────────────┬──────────────────────────┘
+ ↓
+ [automation-engineer]
+ pytest 脚本 + /jmeter-script-gen → JMX
+ ↓
+ /smoke-test(门禁 95%)
+ ↓ 通过
+ [test-executor]
+ 功能回归(P0+P1)
+ ↓ 通过
+ [test-executor]
+ JMeter 性能(ci_quick / full)
+ ↓
+ [bug-manager]
+ ↓
+ [report-generator]
+ Allure + JMeter HTML + Word + 三端通知
+ ↓
+ test-lead 最终决策
+```
+
+---
+
+## ⚡ 快速开始
+
+### 1. GitHub 一键部署(最快)
+
+```bash
+# Mac / Linux 一行远程部署
+curl -fsSL https://raw.githubusercontent.com/Wool-xing/Test-Agent/main/install.sh | bash -s -- /path/to/your-test-project
+
+# 或先 clone 再本地跑
+git clone https://github.com/Wool-xing/Test-Agent.git
+bash Test-Agent工作流搭建/install.sh /path/to/your-test-project
+```
+
+> 默认仓库为 `Wool-xing/Test-Agent`。fork 后将路径替换为你自己用户名(或用 `TEST_AGENT_REPO_URL` 环境变量覆盖)。Windows / 手动方式见 `docs/getting-started/部署说明.md`。
+
+`install.sh` 自动完成:克隆模板 → 装 Claude Code → 建目录 → 拷贝全部文件 → 装 Python 依赖 + Playwright。
+
+### 2. 后续步骤
+
+详细启动指引(含 Java/JMeter/Allure 安装、.env 必填、首次跑通验证):
+
+→ `docs/getting-started/使用手册.md` 顶部 **🚀 启动指引** 章节
+
+### 2. 配置 .env(敏感信息)
+
+```bash
+cd your-test-project
+cp .env.example .env
+# 编辑 .env,填入 TEST_APP_URL / TEST_DB_* / ZENTAO_* / WECHAT_WEBHOOK_URL 等
+```
+
+### 3. 启动 Claude Code
+
+```bash
+cd your-test-project
+claude
+```
+
+### 4. 在 Claude Code 提示符使用斜杠技能
+
+```
+> /smoke-test # 10 分钟 P0 冒烟
+> /test-coordinator # 完整流程
+> /regression-test # 回归 + JMeter
+> /testcase-design # 仅生成用例 Excel
+> /python-script-gen # 生成 pytest 脚本
+> /jmeter-script-gen # 生成 JMeter JMX
+> /data-preparation # 测试数据 + JMeter CSV
+> /zentao-bug-submission # 提交 Bug 到禅道
+```
+
+或自然语言:
+
+```
+> 帮我对用户登录功能进行完整测试。需求:手机号+密码登录,记住密码,
+> 连续失败 5 次锁定 30 分钟。
+```
+
+> 注:`>` 后面是 Claude Code 提示符的输入(斜杠技能或自然语言),**不是 shell 命令**。
+
+---
+
+## 📋 工作流选择指南
+
+| 场景 | 推荐工作流 | 耗时 | 用例范围 | 触发 |
+|------|-----------|------|---------|------|
+| 上线前快速验证 | `/smoke-test` | ~10 分钟 | P0 | 手动 / CI |
+| 新功能完整测试 | `/test-coordinator` | ~2-4 小时 | 全部 | 手动 |
+| 迭代后回归 | `/regression-test` | ~1-2 小时 | P0+P1 | CI 自动 |
+| 数据准备 | `/data-preparation` | ~5 分钟 | - | 测试前自动 |
+| Bug 提交 | `/zentao-bug-submission` | ~2 分钟/个 | - | 失败后 |
+
+---
+
+## 🔧 技术栈速查
+
+| 类型 | 框架/工具 | 版本 | 说明 |
+|------|-----------|------|------|
+| 接口测试 | requests + pytest + allure-pytest | pytest 7.4.3 | |
+| UI 测试 | playwright + pytest-playwright | playwright 1.40.0 | |
+| 性能测试(主) | Apache JMeter | 5.6.3(需独立装 Java + JMeter) | CI/release 门禁权威 |
+| 性能测试(备) | locust | 2.25.0 | 开发期 Python 内压测 |
+| 测试数据 | faker + factory-boy | 20.x + 3.3.0 | utils/data_factory |
+| 覆盖率 | pytest-cov | 4.1.0 | cov 指向 $APP_SRC_PATH |
+| 并行执行 | pytest-xdist | 3.5.0 | 默认 4 进程 |
+| 失败重试 | pytest-rerunfailures | 13.0 | 命令行显式开启 |
+| Mock | pytest-mock | 3.12.0 | unittest.mock 包装 |
+| 配置 | PyYAML | 6.0.1 | regression_modules.yaml |
+| Excel | openpyxl | 3.1.2 | utils/excel_generator |
+| Word 报告 | python-docx | 1.1.0 | utils/generate_report |
+| Bug 管理 | 禅道 SDK 直连 | - | utils/zentao_bug_manager(severity 1=P0) |
+| 通知 | webhook curl 直连 | - | utils/generate_report.send_*(企微/飞书/钉钉) |
+| 重试 | tenacity / 自实现 | 8.2.3 | utils/api_retry_util(10/20/40s) |
+| AI 模型 | Claude 4.x 系列 | Opus 4.7 / Sonnet 4.6 | Claude Code 默认管理 |
+| MCP | filesystem | npm @modelcontextprotocol | 仅启用 filesystem |
+
+---
+
+## 🔐 闭环约定(设计原则)
+
+1. **数据**:测试数据落 `workspace/测试数据/test_data.json`(conftest fixture 直接消费)
+2. **覆盖率**:cov 指向 `$APP_SRC_PATH`(被测系统源码,不是测试脚本本身)
+3. **重试策略**:全栈统一 10/20/40s(指数退避),由 `utils/api_retry_util.call_with_retry` 提供
+4. **severity/pri 映射**:1=P0 / 2=P1 / 3=P2 / 4=P3,由 `utils/zentao_bug_manager.SEVERITY_MAP` 权威
+5. **error_rate 单位**:百分比 pct(字段名 `_pct` 后缀),全栈一致
+6. **基线管理**:仅 release 分支 + full 模式 + 全门禁 PASS 才更新 `perf_baseline.json`
+7. **门禁分层**:smoke / regression / performance_full / performance_ci_quick,由 `utils/ci_quality_gate.py` 与 `utils/jmeter_result_parser.py` 统一实现
+8. **MCP 通道**:当前仅 filesystem。通知/Bug 走 SDK 直连,4 个自定义 mcp_server(zentao/wechat/feishu/dingtalk)按需后续实现
+9. **prod 环境**:`get_current_env()` 直接 raise,禁止误测生产
+10. **Flaky 与 reruns**:冒烟阶段不开 reruns(保留 flaky 信号),回归阶段开 reruns(快速反馈),flaky 由 history 离线归档检测
+
+---
+
+## 📂 部署后目录速览
+
+```
+your-test-project/
+├── .claude/{agents,skills}/ ← 14 agent + 13 skill
+├── .github/workflows/test.yml
+├── Jenkinsfile
+├── utils/ ← 49 个 .py + __init__
+├── src/ ← 被测系统源码(cov 指向)
+├── workspace/
+│ ├── 测试计划/ 需求分析/ 测试用例/ 测试数据/
+│ ├── 自动化脚本/python/ jmeter/
+│ ├── regression_modules.yaml ← 回归范围配置(可选)
+│ └── 执行日志/
+│ ├── allure-results/ allure-report/
+│ ├── jmeter-results/ jmeter-report/
+│ ├── coverage.xml coverage-report/
+│ ├── baselines/perf_baseline.json
+│ ├── history/ ← junit-xml 归档
+│ ├── 截图/ 报告/
+├── conftest.py / pytest.ini / requirements.txt
+├── .mcp.json / .env
+```
+
+---
+
+## 🛠️ 升级 / 回滚 / 卸载
+
+详见 `docs/getting-started/部署说明.md` "升级 / 回滚 / 卸载" 章节。
+
+升级会覆盖:`.claude/agents/`、`.claude/skills/`、`utils/`、`conftest.py`、`pytest.ini`、`requirements.txt`、`.mcp.json`、`.github/workflows/test.yml`、`Jenkinsfile`。
+不会覆盖:`.env`、`workspace/`、`src/`。
+
+---
+
+## 🤝 协作与反馈
+
+- 文档结构、Bug 反馈:在仓库内提 issue
+- 功能扩展:先在 `agents/` 加 agent / `skills/` 加 skill,详见 `CONTRIBUTING.md`
+- 改动 `utils/` 时同步更新 `config/requirements.txt` 与 `ci/` 中的引用
+
+---
+
+## 🔄 跨 AI 工具兼容性
+
+**Claude Code 是默认 / 推荐 runtime,但本项目不强制绑定**。
+
+| 组件 | Claude Code 依赖 | 跨工具适配 |
+|------|----------------|----------|
+| `.claude/agents/*.md`(YAML frontmatter) | ✅ Claude Code spec | Cursor 用 `.cursorrules`;Continue.dev 用 `.continue/`;通用 LLM 拼接为 system prompt |
+| `.claude/skills/*.md`(斜杠技能) | ✅ Claude Code 独有 | 其他工具无对等机制 |
+| `.mcp.json`(MCP 协议) | 半依赖 | MCP 是开放协议;Claude Desktop / Cursor 部分支持;OpenAI 系也开始支持 |
+| `Agent` 工具(test-lead 调用子专家) | ✅ Claude Code 独有 | 其他工具用人工编排 / 多 agent 框架替代 |
+| `utils/*.py`(12 个) | ❌ 纯 Python | 跨工具完全可用 |
+| pytest / Playwright / JMeter / Allure | ❌ 跨工具 | 完全可用 |
+| CI/CD(yml / groovy) | ❌ 跨工具 | 完全可用 |
+| conftest.py / .env / requirements.txt | ❌ 标准 Python | 完全可用 |
+
+### 迁移成本
+
+- **工程链零改动**:utils + pytest + JMeter + CI 完全跨工具
+- **agent / skill 文档需重写**:迁移到 Cursor / Continue / 其他工具的对应格式
+- **失去**:Claude Code skill 自动加载、Agent tool 子专家协调、`.claude/` 目录约定
+
+### 模型选择
+
+- README 中 Claude 4.x(Opus 4.7 / Sonnet 4.6 / Haiku 4.5)是**推荐**而非强制
+- 项目代码本身**不调用任何 LLM API**(utils 全是工具代码)
+- 模型由 Claude Code 账户级管理:`claude /login` + `/model` 切换
+- 用其他 AI 工具时按其规范选模型即可
+
+---
+
+## 📜 LICENSE / CHANGELOG / CONTRIBUTING
+
+- LICENSE:项目按需选择(推荐 MIT / Apache-2.0)
+- CHANGELOG:建议建 `CHANGELOG.md` 记录版本演进(V1.0.0 首版)
+- CONTRIBUTING:维护者按需补充贡献流程
diff --git a/docs/history/2026-5-11 anthropic-devrel-outreach 034400.md b/docs/history/2026-5-11 anthropic-devrel-outreach 034400.md
new file mode 100644
index 0000000..beb3c2c
--- /dev/null
+++ b/docs/history/2026-5-11 anthropic-devrel-outreach 034400.md
@@ -0,0 +1,223 @@
+# Anthropic DevRel 主动联系 · 草稿
+
+> **目的**:进入 Anthropic 生态视野;获得 Featured Blog / Cookbook PR 合入 / Showcase 收录
+> **战略地位**:T3 调整阀 B(6 月无 Anthropic mention → 触发主动出击)
+> **不要做的事**:不公开喊话 / 不要求 endorsement / 不假装是合作伙伴
+
+---
+
+## 联系对象优先级
+
+| 角色 | 谁 | 平台 | 推荐顺序 |
+|------|-----|------|----------|
+| DevRel Lead | Alex Albert | X (@alexalbert\_\_) / LinkedIn | **首选** |
+| Product / Claude Code | Eric Anderson | LinkedIn | 第二 |
+| Cookbook 维护者 | Anthropic Cookbook repo(GitHub) | PR / Issue | 第三(与 1+2 并行) |
+| CPO | Mike Krieger | LinkedIn | **不主动**(太高,需 DevRel 引荐) |
+| 一般 Twitter @AnthropicAI | 官号 | X | 不私聊,但被 @ 时回复 |
+
+---
+
+## 渠道 1 · X(Twitter)DM · 给 Alex Albert
+
+**为什么 X DM 优先**:
+- Alex 公开活跃在 X
+- 短文化适合"打招呼+给 link"
+- 不像 LinkedIn 那样正式
+
+### DM 草稿(短,~280 字符以内 1 条)
+
+```
+Hi Alex, I built Test-Agent — a Claude Code-native testing framework with
+14 agent roles (test-lead orchestrating 8 core + 5 platform specialists).
+Shipped V1.0 today, MIT.
+
+Would love your feedback on the methodology fit for the ecosystem.
+
+→ github.com/Wool-xing/Test-Agent
+```
+
+**字符核对**:276 字符 ✓
+
+**变体 A**(如不想用"feedback"被认为索要 endorsement):
+
+```
+Hi Alex, just shipped Test-Agent — an Agent-Native testing framework
+using Claude Code. 14 agents + 13 skills + 49 utils, MIT licensed.
+
+If it fits your "Built with Claude" criteria, would be honored.
+
+→ github.com/Wool-xing/Test-Agent
+```
+
+**变体 B**(如想突出工程深度):
+
+```
+Hi Alex, Test-Agent went live today — a full testing framework with
+14 specialized Claude Code agents covering Web/API/Mobile/Desktop/IoT/AI/LLM,
+plus 49 Python utils for 20+ protocols.
+
+Open to chat about Claude Code patterns we learned the hard way.
+
+→ github.com/Wool-xing/Test-Agent
+```
+
+**我推荐**:变体 A——不索要 endorsement,但开口"Built with Claude"为 Anthropic 提供 showcase 接口。
+
+---
+
+## 渠道 2 · LinkedIn InMail · 给 Alex Albert + Eric Anderson
+
+**为什么 LinkedIn 第二**:
+- 正式渠道,留底
+- InMail 一封要 credit,慎用
+- 适合"长篇背景介绍"
+
+### InMail 草稿 · 给 Alex Albert · ~150 字
+
+```
+Subject: Built Test-Agent on Claude Code — Agent-Native Testing Framework
+
+Hi Alex,
+
+I'm Wool, full-time on Test-Agent for the past quarter — an open-source
+testing framework that treats AI agents as orchestrators rather than
+assistants. The architecture:
+
+- 14 agents (8 core + 5 platform: mobile/desktop/visual/IoT/AI-LLM)
+- 13 skills, 49 Python utils, 20+ protocols
+- Layered quality gates (smoke 95%/regression 90%/perf P95)
+- 5-min web-demo to validate without full deploy
+- MIT, no vendor lock-in
+
+I'd love feedback on whether the methodology fits the Claude Code
+ecosystem direction, and how to engage productively (Cookbook PR?
+Featured Blog? Discord channel?).
+
+GitHub: github.com/Wool-xing/Test-Agent
+
+Best,
+Wool
+```
+
+### InMail 草稿 · 给 Eric Anderson(更聚焦 Claude Code 产品角度)
+
+```
+Subject: Pattern feedback - Claude Code as test team orchestrator
+
+Hi Eric,
+
+Quick context: I built Test-Agent (github.com/Wool-xing/Test-Agent) —
+a testing framework where Claude Code orchestrates 14 specialized agents.
+Going through MIT, ~95% coverage, all platforms.
+
+Beyond just promoting it, I'd value 30 min on:
+- Patterns that worked vs broke when scaling Agent + Skill across 14 roles
+- MCP usage observations (we kept it to filesystem only, used SDKs for
+ notifications/Bug — happy to share the rationale)
+- Roadmap priorities you'd find most useful (the Agent SDK adapter we're
+ planning for Phase 4)
+
+Open to async or call, your preference.
+
+Best,
+Wool
+```
+
+---
+
+## 渠道 3 · Anthropic Cookbook · GitHub PR
+
+**为什么 PR 渠道**:
+- 留 commit history,official 合入即 endorsement
+- Cookbook 维护者审 PR 时会看 repo——传播路径
+- 比 DM 更"产品",少社交压力
+
+### Cookbook PR 路径
+
+1. fork `anthropics/anthropic-cookbook` 或 `anthropics/claude-cookbooks` repo
+2. 加目录 `examples/test-agent/`
+3. 内容:
+ - `README.md`:100-200 字介绍 + 链接
+ - `agent-orchestration-pattern.ipynb`:用 Claude API 演示 test-lead 路由逻辑(精简版,不依赖 Test-Agent repo)
+ - `skill-composition.ipynb`:演示 8 个核心 Skill 的组合方式
+4. 在 PR description 附上 Test-Agent repo 链接 + Show HN 链接(如已发)
+
+### PR description 草稿
+
+```markdown
+## Add Test-Agent · Agent-Native Testing Framework example
+
+This PR contributes two notebooks demonstrating how to build a
+testing-oriented agent team with Claude Code:
+
+1. `agent-orchestration-pattern.ipynb` — test-lead pattern for
+ coordinating 8+ specialized agents based on PRD keyword routing
+2. `skill-composition.ipynb` — composing smoke / regression / coordinator
+ skills with shared quality-gate state
+
+Both are derived from Test-Agent (open source, MIT):
+github.com/Wool-xing/Test-Agent
+
+Patterns shown:
+- Multi-agent orchestration with Claude as the bus
+- Layered quality gates as decision input to the agent
+- MCP filesystem channel + SDK direct calls (vs. all-MCP approach)
+- Auto-routing PRD format detection (md/pdf/docx/exe/apk/...)
+
+Open to feedback on the structure / which patterns to highlight more.
+```
+
+---
+
+## 时机选择
+
+| 渠道 | 何时发 | 为何 |
+|------|--------|------|
+| X DM Alex | **博客 + Show HN 发完后 24h 内** | 让他能看到外部对项目的反应(不是冷启动 spam) |
+| LinkedIn Alex + Eric | X DM 后 48h,无回复时 | 不要并发轰炸 |
+| Cookbook PR | **博客发完同时**,作为"成熟度证据" | 写 PR description 时引用博客 |
+
+**关键原则**:先有外部声量(博客 + HN 评论 + 几 star),再敲 Anthropic 门。否则是冷启动 spam。
+
+---
+
+## 不要做的事
+
+- ❌ 不要在 Twitter 公开 @Anthropic 喊话求 endorsement(廉价感)
+- ❌ 不要在多个渠道 24h 内同时发(被认为 spam)
+- ❌ 不要在邮件/DM 里写"我们"(除非有 co-maintainer)
+- ❌ 不要假装是合作伙伴(不诚实,被发现毁牌)
+- ❌ 不要在 DM 里写超过 280 字(短才有回复率)
+- ❌ 不要发完没回复就再发(等 1 周再轻轻 follow up)
+- ❌ 不要把 Anthropic 提及作为 README 主卖点(喧宾夺主)
+
+---
+
+## 期望管理(你心理预期)
+
+- 50% 概率:完全没回复(DevRel inbox 巨量)→ 正常,1 周后 follow-up 1 次即可
+- 30% 概率:有回复,回话术化感谢("thanks for sharing")→ 仍是成功,后续可以"我们更新了 X,您有兴趣..."
+- 15% 概率:进一步对话或邀约 chat → 这是大成功,准备好 30 min 内容
+- 5% 概率:Cookbook 合入 / Featured Blog → 跑路线图 Phase 4 决策门 G4
+
+**6 个月内任一概率以上发生 = T3 调整阀 B 不触发**(不需要"急转直下"行动)。
+
+---
+
+## Follow-up 节奏(如无回复)
+
+- Day 0:发首发 DM / PR
+- Day 7:如无回复,**X 主页发一条 tweet**(不是 @Anthropic,是 nature post)展示 Test-Agent 新进展
+- Day 14:如无回复,发 LinkedIn InMail(不同渠道)
+- Day 30:如无回复,**写一篇博客**"What I learned building Claude Code agents"——客观技术分享,无任何索要
+- Day 60:如无回复,参加 Anthropic Discord / 任何在线活动,**自然接触**(不主动 DM 同人)
+- Day 90:如无回复,T3 阀 B 触发,重审"Anthropic 生态卡位"是否仍是 Phase 4 目标
+
+---
+
+## 一句话哲学
+
+> 主动联系 Anthropic 不是为了"被 Anthropic 看到",是为了"让 Anthropic 觉得我们在帮他们做事"。后者会主动 reach out,前者不会。
+>
+>
diff --git a/docs/history/2026-5-12 SURVEY 035435.md b/docs/history/2026-5-12 SURVEY 035435.md
new file mode 100644
index 0000000..2c57940
--- /dev/null
+++ b/docs/history/2026-5-12 SURVEY 035435.md
@@ -0,0 +1,138 @@
+# 用户调研问卷(2026-05 · V1.8 发布前)
+
+> 目标:100 用户中收回 ≥ 30 份。决定 W3 砍/留 + W4 路线方向。
+> **匿名**:无真名 / 真邮箱 / IP;只问行为 + 反馈。
+> 部署:Google Forms / 飞书表单 / 腾讯问卷;**5 分钟答完**。
+
+---
+
+## 第 1 部分 · 你是谁(2 题)
+
+### 1.1 你的主要角色是?(单选)
+- [ ] QA 工程师
+- [ ] 测试开发(SDET)
+- [ ] 安全研究员 / 渗透测试
+- [ ] 后端 / 全栈开发
+- [ ] DevOps / SRE
+- [ ] 车载 / 嵌入式
+- [ ] AI / ML 工程师
+- [ ] 学生 / 自学者
+- [ ] 其他:____
+
+### 1.2 团队规模?(单选)
+- [ ] 独狼(我自己)
+- [ ] 2-5 人小团队
+- [ ] 6-20 人中型
+- [ ] 21-100 人
+- [ ] 100+ 大厂
+
+---
+
+## 第 2 部分 · 你怎么用 Test-Agent(5 题)
+
+### 2.1 你**最常用的 3 个 Skill** 是?(多选 ≤ 3)
+- [ ] /smoke-test (冒烟)
+- [ ] /test-coordinator (完整流程)
+- [ ] /regression-test (回归 + JMeter)
+- [ ] /testcase-design (用例 Excel)
+- [ ] /python-script-gen (pytest 脚本)
+- [ ] /jmeter-script-gen (JMeter)
+- [ ] /data-preparation (数据准备)
+- [ ] /zentao-bug-submission (Bug 提单)
+- [ ] /mobile-test (移动端)
+- [ ] /desktop-test (桌面)
+- [ ] /visual-test (视觉/游戏)
+- [ ] /system-test (IoT/音视频/MQ)
+- [ ] /ai-test (AI 模型)
+- [ ] /pentest-coordinator (渗透总编排)
+- [ ] /pentest-web (Web 渗透)
+- [ ] /pentest-api (API 渗透)
+- [ ] /automotive-test (车载主)
+- [ ] /tdd-workflow (TDD)
+- [ ] /verification-loop (5-phase verify)
+- [ ] /e2e-testing (E2E)
+- [ ] /eval-harness (eval 框架)
+- [ ] /security-review (安全 review)
+- [ ] /build-your-own-x-explorer (教学引导)
+- [ ] /darwin-skill (skill 自进化)
+- [ ] /karpathy-guidelines (LLM 写代码)
+- [ ] /agent-introspection-debugging (agent 调试)
+
+### 2.2 你**从未用过**的 Skill?(多选,实话)
+(同上选项,标 0 用率的)
+
+### 2.3 平均每周用 Test-Agent 多少次?
+- [ ] >10 次(重度)
+- [ ] 3-10 次(中度)
+- [ ] 1-2 次(轻度)
+- [ ] 几乎不用了(为什么? ____)
+
+### 2.4 你用什么 LLM?(多选)
+- [ ] Claude (Anthropic)
+- [ ] GPT-4 / GPT-4o (OpenAI)
+- [ ] Gemini (Google)
+- [ ] Qwen (阿里 DashScope)
+- [ ] DeepSeek
+- [ ] GLM (智谱)
+- [ ] Kimi (Moonshot)
+- [ ] 本地(Ollama / llama.cpp)
+- [ ] 其他:____
+
+### 2.5 你主要测什么?(多选)
+- [ ] Web 应用
+- [ ] REST API / GraphQL
+- [ ] 移动 App(Android/iOS/小程序)
+- [ ] 桌面应用(EXE/.app/Electron)
+- [ ] AI / LLM 应用
+- [ ] 嵌入式 / IoT
+- [ ] 车载 / 自动驾驶
+- [ ] 区块链 / Web3
+- [ ] 安全 / 渗透
+- [ ] 性能 / 压测
+- [ ] 其他:____
+
+---
+
+## 第 3 部分 · 痛点 + 改进(3 题)
+
+### 3.1 你**最不满意**的 Top 3 痛点?(开放)
+1. ____
+2. ____
+3. ____
+
+### 3.2 你**最希望加什么功能**?(开放,1 句)
+____
+
+### 3.3 你**最希望砍什么功能**?(开放,如有)
+____
+
+---
+
+## 第 4 部分 · 推荐 + 合作(2 题)
+
+### 4.1 你会推荐 Test-Agent 给同事 / 朋友吗?(NPS)
+1 = 完全不会 ··········· 10 = 强烈推荐
+
+### 4.2 愿意被进一步访谈?(15 分钟,匿名 OK)
+- [ ] 愿意(留邮箱或 Telegram/微信,**不保留 30 天后删**)
+- [ ] 不愿
+
+---
+
+## 数据处理
+
+- 收集后 30 天内分析完毕
+- 原始数据 30 天后删除
+- 汇总数据(无 PII)留存 ≤ 2 年用于产品迭代
+- 不与第三方分享
+- 详见 [PRIVACY.md](PRIVACY.md)(如有,否则参考 GDPR 第 5 条最小化原则)
+
+## 投放策略
+
+| 渠道 | 何时 | 预期回收 |
+|------|------|---------|
+| 项目内 banner(登录后弹) | W1 立即 | 30-50% |
+| Discord 公告 | W1 | 10-20% |
+| 邮件(留过邮箱的用户) | W2 | 15-25% |
+| 微信群 / Telegram 群 | W2 | 20-30% |
+| **目标** | W3 末 | **≥ 30 份** |
diff --git "a/docs/history/2026-5-14 \345\256\236\344\276\213\345\214\226\346\250\241\346\235\277 042802.md" "b/docs/history/2026-5-14 \345\256\236\344\276\213\345\214\226\346\250\241\346\235\277 042802.md"
new file mode 100644
index 0000000..b8f5b9a
--- /dev/null
+++ "b/docs/history/2026-5-14 \345\256\236\344\276\213\345\214\226\346\250\241\346\235\277 042802.md"
@@ -0,0 +1,231 @@
+# 协作宪章实例化模板
+
+> **用途**:指导任一项目把 `通用项目协作宪章.md` 实例化到项目根目录的 `00-协作宪章.md`。
+>
+> **关系**:通用宪章给骨架 + 占位符;本模板给填写指南 + 两个范例(TG 云盘 / Test-Agent)。
+>
+> **何时用**:新项目第一次落地协作宪章 / 现有项目改造为继承通用宪章。
+
+---
+
+## §1 · 实例化文件骨架
+
+把下面这块复制到项目根目录的 `00-协作宪章.md`,按各段提示填空:
+
+```markdown
+# [项目名] 协作宪章(实例化)
+
+> 本宪章继承自 [通用项目协作宪章.md](路径)。本文件**仅填项目特化层**——通用机制不复述,按 §X 引用元宪章。
+>
+> **生效日期**:YYYY-MM-DD
+> **版本**:v1
+> **继承自**:通用项目协作宪章 v5
+
+---
+
+## §0 · 双北极星(项目特化)
+
+### §0a · 落地准则
+[填: 项目最终交付物 = 用户___后跟着___走, 能在___内 ___。]
+[反向约束直接继承通用宪章 §0a, 不复述。]
+
+### §0b · 质量准则
+[填: 项目质量准则名 (常见: 安全准则 / 诚实准则 / 兼容准则)。]
+[四层覆盖填项目特化内容:]
+- **静态层**:[禁内容定义]
+- **运行层**:[禁字段定义]
+- **架构层**:[默认值要求]
+- **流程层**:[每 fix 扫描的违反面定义]
+
+### 修复优先级(项目特化)
+[填: 三尺子各自的项目内语义 + 分级表]
+
+---
+
+## §1.3 · 六道闸文档语义(项目特化)
+
+[每项目按机制 1.3 表格填 a/c/d/f 闸的项目内具体语义。b/e 闸通用直接继承。**f5 / f6 元规则跨项目通用强制,本节不需重声明**(自动从通用底座 §1.3 继承)。本节只填 f1-fN 项目特化子项。]
+
+| 闸 | 项目特化定义 |
+|---|---|
+| a · 静态 | [项目内 lint/check 工具] |
+| c · 跨层契约 | [项目内跨层一致性检查范围] |
+| d · 实测 | [项目内实测方式] |
+| f · 0b 自检 | [项目特化 f1-fN:对应 §0b 四层的 fix 时扫描清单。f5/f6 自动继承元规则。] |
+
+---
+
+## §2 · 反馈模板占位符取值
+
+[列出通用宪章 §2 反馈模板里的每个 `{{项目特化: ...}}` 在本项目的具体值]
+
+| 占位符 | 本项目取值 |
+|---|---|
+| `{{项目特化: a 静态}}` | [例: tsc --noEmit] |
+| `{{项目特化: c 跨层契约}}` | [例: README ↔ NOTICE ↔ SECURITY 一致] |
+| `{{项目特化: d 实测}}` | [例: pytest 跑通] |
+| `{{项目特化: f 0b 自检}}` | [例: 凭据 / 日志 / URL / 默认值 四子项] |
+| `{{项目特化视角}}` | [例: 攻击者视角 / 数字证据视角 / 用户视角] |
+
+---
+
+## §4 · 角色分离 · 本项目交付物清单
+
+**禁引用宪章愿景话术的交付物**:
+- [列出项目内所有面向用户的交付物文件]
+
+**禁话术清单**(例):
+- "我们追求 ___ 主义"
+- "三公理 / 五条铭文"等内部哲学
+- 项目内部代号 / 章节编号(用户看不到)
+
+---
+
+## §5 · 修复优先级算法 · 本项目尺子
+
+通用公式:`priority = max(P_严重度, 0a_落地链卡点权重, 0b_违反严重度权重)`
+
+本项目分级:
+- **P 严重度**:[P0/P1/P2/P3 定义]
+- **0a 卡点权重**:[分级定义]
+- **0b 违反严重度权重**:[分级定义]
+
+---
+
+## §6 · 本项目特化红线
+
+[追加项目特化红线,例:]
+- [项目特有的"不能做"清单]
+
+通用红线(凭据零写入 / 反馈不造假 / 用户未拍板不擅自推进)直接继承。
+
+---
+
+## §7 · 项目文件清单
+
+| 文件 | 用途 | 谁更新 |
+|---|---|---|
+| 本文件 | 项目实例化层 | 用户/Claude 共同决策 |
+| [进度表文件] | 全部问题清单 + 修复进度 | 每次修完 Claude 更新 |
+| [灵感笔记文件] | 项目内灵感沉淀 | Claude 随时写, 用户审 |
+| ... | ... | ... |
+```
+
+---
+
+## §2 · 范例 A · TG 云盘(运行型项目)
+
+> TG 云盘 = web 应用. 0b 选**安全准则**. 已有 v4.1 宪章, 这里展示如何迁移到通用宪章实例化层.
+
+### §0a · 落地准则
+> **新手 clone 后跟着 `启动指南.md` 走,能在 30 分钟内部署成功、登录、上传、下载、分享。**
+
+### §0b · 安全准则
+- **静态层**:仓库 / 代码 / 配置文件永远不含真实凭据、密钥、token、个人信息
+- **运行层**:日志 / 错误响应 / API 响应 / URL query / 邮件正文不能泄露敏感字段
+- **架构层**:默认配置即安全(默认密码强校验、HTTPS 默认开、敏感端点默认关、限流默认生效)
+- **流程层**:每个 fix 完成后扫描"是否引入新泄露面"
+
+### 修复优先级 · 攻击者得手难度
+- **zero-day knowledge**(不需任何前置信息就能利用,如默认密码、Swagger 生产暴露)→ 最优先
+- **passive observation**(被动嗅探即可,如 HTTPS 未开、token 走 URL query)→ 次优先
+- **active scan + low skill**(需扫描但门槛低,如未限流登录爆破)→ 再次
+- **need credential / insider**(需已有凭据,如越权访问)→ 一般
+
+### §1.3 六道闸 TG 取值
+
+| 闸 | TG 取值 |
+|---|---|
+| a · 静态 | `tsc --noEmit` / Read 复查 |
+| c · 跨层契约 | 前端 / Worker / Nginx / DB 三处对齐 |
+| d · 实测 | 能跑 / 复现修前 bug 失败 / 修后 bug 不再复现 |
+| f · 0b 自检 | f1 凭据扫描 / f2 日志/响应字段 / f3 URL query / f4 默认值审计 + **f5/f6 元规则自动继承(置信度标注 + 假阳性过滤)** |
+
+### §2 反馈模板占位符(TG)
+| 占位符 | TG 取值 |
+|---|---|
+| `{{项目特化视角}}` | 攻击者视角(修前: 攻击者能 X;修后: 难度上升到 Y) |
+
+### §4 交付物清单(TG)
+- `README.md` / `启动指南.md` / `LICENSE` / `代码审查报告.md` 公开摘要
+- 禁话术: "三公理" / "灵魂底色"等内部哲学
+
+### §6 红线(TG 追加)
+- 凭据零硬编码到代码 / 配置 / commit message
+- 数据库 schema 改动必须有 rollback 脚本
+
+---
+
+## §3 · 范例 B · Test-Agent(文档/框架型项目)
+
+> Test-Agent = 测试 Agent 框架 + 文档. 0b 选**诚实准则**(不选安全, 因无运行时攻击面).
+
+### §0a · 落地准则(双尺)
+- **0a-1 短期**:新手 30 分钟内 clone + install + `tagent demo` 跑通
+- **0a-2 终态**:4 层闭环 + agent 协作 + skill 调用 + 真 LLM-driven agent 跑通
+
+### §0b · 诚实准则
+- **静态层**:文档零虚假承诺 / 零路径泄漏 / 零未脱敏 PII / 零未实测数字
+- **运行层**:跑命令产出的日志 / 报告 / 反馈不含未脱敏路径 / 不抄文档当实测
+- **架构层**:默认 README / NOTICE / SECURITY 不引用 FULL_GUIDE 愿景话术(角色分离)
+- **流程层**:每个 fix 完成后扫描"是否引入新虚假承诺 / 数字膨胀 / 法律风险 / 路径泄漏"
+
+### 修复优先级 · 暴露面 × 引用辐射 × 用户可见度
+- **finding 严重度 × 用户可见度**:README / 启动指南 / NOTICE 级 > 02-专家 / 03-技能级 > 内部 INDEX 级
+- **引用辐射数**:grep 出多少文件引用该 finding 所在节
+- **0b 维度优先**:虚假承诺 > 数字误差 > 路径泄漏 > 法律措辞瑕疵
+
+### §1.3 六道闸 Test-Agent 取值
+
+| 闸 | Test-Agent 取值 |
+|---|---|
+| a · 静态 | markdown lint + 死链检查 + 中英文混排格式 |
+| c · 跨层契约 | README ↔ NOTICE ↔ SECURITY ↔ FULL_GUIDE ↔ 00-项目导航 一致性 |
+| d · 实测 | **真跑命令验证, 不抄文档**(如声明"30 秒启动" → 实测计时) |
+| f · 0b 自检 | f1 数字带证据 / f2 路径不漏 / f3 虚假承诺不回潜 / f4 法律措辞合规 + **f5/f6 元规则自动继承(置信度标注 + 假阳性过滤,Test-Agent v3 PR #42 落地)** |
+
+### §2 反馈模板占位符(Test-Agent)
+| 占位符 | Test-Agent 取值 |
+|---|---|
+| `{{项目特化视角}}` | 数字证据视角(修前: 数字 X 无来源 / 来自虚报;修后: 数字 X 实测于 commit Y) |
+
+### §4 交付物清单(Test-Agent)
+- `README.md` / `NOTICE.md` / `SECURITY.md` / `CODE_OF_CONDUCT.md` / `CONTRIBUTING.md` / `LICENSE`
+- `docs/getting-started/*` / `启动指南` 类
+- 禁话术: "九大簇" / "灵魂底色" / "五条铭文" / "三公理" 等 FULL_GUIDE 内部愿景
+
+### §6 红线(Test-Agent 追加)
+- **零虚假承诺**:未实测的数字 / 未跑通的 demo / 未实现的功能, 不写"已支持"
+- **零路径泄漏**:私域路径 (D:\项目文件\_精髓库\等) 不进 public 仓库文档
+- **HANDOFF 必带**:会话结束未完事项必交班, 不假装完成
+
+---
+
+## §4 · 实例化校验清单
+
+新项目 `00-协作宪章.md` 写完, 自检以下 6 项:
+
+- [ ] §0a 落地准则有**一句话**判断标准, 可被 fix 反馈引用
+- [ ] §0b 质量准则名明确(安全 / 诚实 / 兼容 / 其他)+ 四层覆盖每层有具体内容
+- [ ] §1.3 六道闸 a/c/d/f 都有项目内具体工具或检查方式(不写"按通用宪章"敷衍)
+- [ ] §2 反馈模板每个占位符在本文件能查到取值
+- [ ] §4 交付物清单列出至少 3 个文件 + 禁话术举例
+- [ ] §5 修复优先级三尺子分级表完整, 不留 TODO
+
+校验全过 → 项目实例化完成, 可正式启用本宪章协作。
+
+---
+
+## §5 · 升级元宪章时的同步规则
+
+通用宪章升 v6+ 时:
+- 列改动清单(哪些段/机制变了)
+- 所有项目实例化文件**逐个 review**: 项目特化层是否需跟改
+- 不需要跟改的项目, 在自己 `00-协作宪章.md` 头部声明"继承自 通用宪章 v5"(锁版本)
+- 需跟改的项目, 更新 `00-协作宪章.md` 头部到新版本号 + 改对应特化层
+
+**版本沿革**:
+
+| 版本 | 日期 | 变更 |
+|---|---|---|
+| v1 | 2026-05-13 | 首版 · 配合通用项目协作宪章 v5 |
diff --git "a/docs/history/\346\267\261\345\272\246\345\257\271\346\212\227\345\256\241\346\237\245\346\212\245\345\221\212_2026-05-12.md" "b/docs/history/\346\267\261\345\272\246\345\257\271\346\212\227\345\256\241\346\237\245\346\212\245\345\221\212_2026-05-12.md"
new file mode 100644
index 0000000..e473da5
--- /dev/null
+++ "b/docs/history/\346\267\261\345\272\246\345\257\271\346\212\227\345\256\241\346\237\245\346\212\245\345\221\212_2026-05-12.md"
@@ -0,0 +1,577 @@
+# Test-Agent 项目 零容忍 深度对抗审查报告 (V2)
+
+**审查日期**:2026-05-12
+**审查对象**:`D:\项目文件\Test-Agent工作流搭建\` (HEAD, working tree, VERSION = `1.14.0-alpha`)
+**仓库**:https://github.com/Wool-xing/Test-Agent
+**审查方式**:在 baseline 报告 `Test-Agent-审查报告_2026-05-12.md` (572 行 / 20 问) 基础上,8 路并行对抗子代理 + Windows MSYS 真机 install.sh 复现 + GitHub License API 上游核实 + runtime/ 29 子模块逐文件审。
+**子报告归档**:`C:\Users\admin\Downloads\TA-deepreview-2026-05-12\SA1..SA8-*.md`
+
+---
+
+## 0. 一段读完(总评 V2)
+
+baseline 的 "两个项目缝在一起未对齐" 大判断**站得住**,但承诺-交付裂口比 baseline 描述**更大、更深、更早**:
+
+1. **30 秒 hero CTA 命令就错** — README 头条 `git clone … && cd Test-Agent && pip install -e .` 跑不通。仓库根目录**没有 `pyproject.toml` 也没有 `setup.py`**,唯一的 Python 项目声明在 `runtime/pyproject.toml`。比 baseline Q3 "runtime 不被 install.sh 装" 更直接打脸——连最简 `pip install -e .` 都失败。新用户 5 秒报错。
+2. **作者内部已自承 6 个 agent 是 no-op** — `discussions/HANDOFF_2026-05-12_V1.14.md` 自表 `5 真 LLM + 5 SCRIPT + 6 no-op = 16 expert`,并写明 "不要直接上 HN,HN 会刨根问底 16 个 agent 是真 LLM 还是 mock"。但 README 头条仍宣称 "16 expert agents · 32+ reusable skills · MCP-native · Self-test 100%"。**内部诚实 vs 对外营销公然分裂入仓**。
+3. **runtime/ 1/3 代码是橱窗** — `scheduler / backends / learning_loop / gateway` 共 31 文件 / 1627 LOC 在 `runtime/cli/` `runtime/api/` 全文 0 命中,纯悬空层。`tagent.yml.example` 配了用户也调不到。即使按 baseline Q3 的 B 选项 `pip install -e runtime/`,这四个模块仍是 dead inventory。
+4. **3 处虚假 MIT/CC0 + 1 处 TODO + 武器化代码零授权墙** — `darwin-skill` / `karpathy-guidelines/karpathy-skills` / `build-your-own-x` 上游 GitHub License API 实测全部 `null`(无 LICENSE 文件),NOTICE.md 标为 MIT/CC0 是虚假归属。pentest skill + ai_adversarial.py + api_security_scanner.py 三处武器化代码在 SECURITY.md / README / NOTICE 内**完全无授权警告**。
+5. **baseline 自身有 5 处需要订正** — baseline 在没有用 `git ls-files` 验证的几条上误判:Q4 `test-project/` 不在工作树(不存在);Q5 `workspace/` 0 tracked;Q13.1 `runtime/mcp/test_orchestrator/server.py` 真实存在且 6 件套全齐;Q13.3 Jenkins triple-quote `"""...'...'..."""` 是正确写法;Q14:50 trap rm 是 correct 模式(baseline 误读 dirname)。Q10 "撞 anthropics/skills 官方同名" 在官方仓 0/4 命中,真实风险是撞 Claude Code 内置 slash command `/security-review` + 3 处伪造 ECC 派生声明。
+
+修复决策的真问题仍然是 baseline 末段的架构决策点(A/B/C 选哪个),但**先决条件**是 README + FULL_GUIDE 必须先撤回 "production runtime / Self-test 100% / 16 expert agents" 这一类话术,否则技术线怎么修都填不上信任沟。
+
+---
+
+## 1. 数据校准表(实测 vs 基线 vs README)
+
+| 维度 | 实测(2026-05-12) | baseline 说 | README/FULL_GUIDE 说 | 备注 |
+|---|---|---|---|---|
+| agents/ 业务 agent .md | **16** | "16" 或局部 "14" | "16 expert agents" | README 准;FULL_GUIDE 第 33/137-179/779/980/1093/1191/1195-96/1220/1247 行均写 "14";00-项目导航.md 第 263 行写 "14";CONTRIBUTING.md:113 写 "14";install.sh:101-102 硬编码 14 |
+| skills/ skill | **33 顶层 .md + 2 子目录** (`darwin-skill/`, `karpathy-guidelines/`) | "32 + 2 子目录" | "32+ reusable skills" (README:42) / "34 skills" (README:100) | 同一份 README 内 32 ↔ 34 自打架;CHANGELOG V1.7 写 32(漏 karpathy = 应是 33) |
+| utils/ utils .py | **49** (含 `__init__.py`) = 48 业务 + 1 init | 49 | 49 | 准 |
+| runtime/ 子目录 | **22 包**:api/backends/cli/config/essence_watcher/exporters/gateway/healthcheck/init/learning_loop/marketplace/mcp/observability/orchestrator/registry/router/scheduler/storage/subagent/tests/tutor/web | "29 子模块" | "runtime V1.1.0-alpha 新增" | runtime/INDEX.md 把 runtime 写得像 V1.1 终版 |
+| Python project 根级 `pyproject.toml` | **不存在** | 未点 | README:18 `cd Test-Agent && pip install -e .` | **命令必败**(新增致命发现) |
+| `runtime/pyproject.toml` version | **`1.1.0a0`** | 未点 | VERSION = `1.14.0-alpha` | 项目自身 2 个版本号差 13 个 alpha |
+| examples/ tracked files | **10** | "2457 文件污染" | — | baseline 误判 — 2447 delta 全是 `.venv/__pycache__/.pytest_cache`,被 `.gitignore` 拦下 |
+| test-project/ 在工作树 | **不存在** (`git ls-files` 0 命中) | "tracked + 双源" | — | baseline 误判 — 那是 install.sh 输出目录,没入仓 |
+| workspace/ tracked | **0** | "tracked 干扰源" | — | baseline 误判 — 81 个本地文件全未入仓 |
+| compliance profiles 实跑 | **0/10 functional**(10/10 `status: skeleton`) | "skeleton" | "10 行业合规预置" | README/00-导航 应改 "10 skeleton placeholders" |
+| MCP servers 实启用 | **1/6** (`.mcp.json:14` 只启 test_orchestrator;其余 5 件套写在 `_pending_servers_v1_2_0_alpha`) | "5 mcp 不存在" | "MCP-native 6-server suite" | baseline 误判 — 6 个 server 文件全在;运行层只启 1 件套 |
+| BugTracker adapters 实装 | **1** (`zentao_bug_manager.py`) | — | README:46 "6 BugTracker adapters — Zentao · Jira · GitHub · GitLab · Linear · Webhook" | 5 个广告未交付 |
+| expert 真/假分布 | **5 真 LLM + 5 SCRIPT + 6 no-op** = 16 (HANDOFF 自承) | "16 expert" | "16 expert agents" | env / mobile / visual / system / 渗透 / 车载 = no-op |
+| selftest 通过率 | 9/9 PASS **但 stub path** | "9/9 PASS 是真" | README badge "Self-test 100%" | V1.13/V1.14 selftest log 中部含 `ModuleNotFoundError: psycopg`,被 try/except 兜住后走 stub,"100%" 是骨架通不是内涵通 |
+
+---
+
+## 2. baseline 裁决总表
+
+| baseline 章 | Confirmed | Partial | Refuted | Extended | 备注 |
+|---|---|---|---|---|---|
+| Q1 install.sh 列表残缺 | ✅ | — | — | +"无 README.md 拷贝" | 16 agent 漏 2 + 33 skill 漏 19 + 2 子目录漏;install.sh:102 字面 `for d in 01..14` |
+| Q2 V1.0.0 vs 1.14.0-alpha | ✅ | — | — | +"runtime pyproject 1.1.0a0 是第三个版本号" | `install.sh:15` / `FULL_GUIDE.md:9-10,1003,1247` / `Test-Agent工作流搭建.md:1` 全卡 V1.0;CONTRIBUTING.md:128 自定铁律自破 |
+| Q3 runtime 不部署 | ✅ | — | — | +"scheduler/backends/learning_loop/gateway 4 个悬空层,装了也调不到" | `runtime/cli/` `runtime/api/` 全文 grep 4 模块 0 命中 |
+| Q4 test-project/ 入仓 | — | — | ❌ | — | 工作树不存在 + `git ls-files` 0 命中。**baseline 误判** |
+| Q5 workspace/ 产物入仓 | — | ⚠️ | ❌("入仓" 部分) | — | 0 tracked。`.gitignore` 已拦 9 成;残留风险:`workspace/_init_smoke/`、`testcases_sample.xlsx`、`_smoke_plan.json` 未匹配 |
+| Q6.1-Q6.4 4 个代码 bug | ✅✅✅✅ | — | — | +"mq_helper 还能用 ruff 加 unreachable-after-return 防回" | `mobile_driver.py:88` 三元死分支;`mq_helper.py:60-69` 不可达 break;`push_test.py:50-68` HTTP/1.1 APNs(行内注释自承 "实际生产用 hyper / httpx HTTP/2");`media_validator.py:66` import 路径不一致 |
+| Q7 渗透授权零纵深 | ✅ | — | — | +"`pentest-exploit.md:14` 引用 `runtime/backends/docker.py` 但 runtime 不被 install" | 15/pentest-*/automotive-* skill 全部 `tools: Bash`,授权检查仅在 markdown prose;Claude Code skill loader 不读 tagent.yml |
+| Q8.1-Q8.9 9 项安全 | ✅✅✅✅✅✅✅✅✅ | — | — | +3 项新(见 SA3 NEW-1/2/3) | SQL 注入 / XSS / webhook 注入 / chaos sudo / API key URL / 明文密码 / SSRF probe / jailbreak templates / DSN 不 urlencode / p8 路径 |
+| Q9 数字六重不一致 | ✅ | — | — | +"实际 11 重不一致" | 13 处文档全部命中;新增:CHANGELOG V1.6 vs V1.7 自身算术互不自洽 (14+7+5=26 vs 14+7+5+6+1=33);runtime/INDEX.md 也写 14/13;02-/03-/04-README 全错 |
+| Q10 撞官方同名 | — | ⚠️ | ❌ (撞 anthropics/skills 部分) | +"撞 Claude Code 内置 `/security-review` + 3 处伪造 ECC 派生声明" | 实测 `anthropics/skills/skills/` 17 个 skill,**0/4 命中**;真风险是 `/security-review` 内置 slash + 3 处 frontmatter "派生自 ECC 同名 skill" 上游 ECC 不存在 |
+| Q11.1 karpathy-guidelines | ✅ + 加重 | — | — | +"上游 GitHub License API 实测 null(不仅 LICENSE-UPSTREAM 空)" | LICENSE-UPSTREAM 0 字节;上游 `forrestchang/andrej-karpathy-skills` 顶层无 LICENSE 文件,gh api `license: null` |
+| Q11.2 darwin-skill | ✅ + 加重 | — | — | +"上游 alchaincyf/darwin-skill GitHub License 字段 = null(仅 README 口头声明 MIT)" | 子目录无 LICENSE/NOTICE;上游 README 底部口头 MIT + badge 404 |
+| Q12.1 NOTICE 占位 | ✅ + 加重 | — | — | +"5 处虚假标注,不止 gbrain 一处" | darwin-skill / karpathy-guidelines / karpathy-skills / build-your-own-x = 4 处 MIT/CC0 虚假 + gbrain 1 处 TODO |
+| Q12.2 CHANGELOG | ✅ | — | — | +"V1.6 vs V1.7 段内部互锁矛盾" | 357 行才 [1.0.0];前 356 行全 [Unreleased];Added/Fixed/Changed 子节混用 |
+| Q12.3 法律线无血 | ✅ | — | — | — | LICENSE / SECURITY URL / COC 占位 / VERSION 全填实 |
+| Q13.1 .mcp.json | ⚠️ (一半) | — | ❌ (server 不存在 这条) | — | `${PROJECT_ROOT:-}` 语法 ✅ 不展开;但 `runtime/mcp/test_orchestrator/server.py` 真实存在(204 LOC,5 工具),baseline Q13.1 后半判错。真问题:server.py:4 docstring `list 14 experts + 14 skills` 过时 |
+| Q13.2 github-actions | ⚠️ | — | — | +"分支名 markdown 注入 + GH Pages action 弃用 + Allure action Node 16" | 行号 :390 / :443/452,事实方向对;pip cache 论据 partial |
+| Q13.3 jenkins | — | — | ❌ (单引号说法) | +"Jenkins console webhook URL 明文落 console,凭据泄漏" | Groovy `"""..."""` 是 GString 会插值,内层 sh single-quote 防二次展开是正确写法;但 `${WECHAT_WEBHOOK}` 经插值后写入 sh 命令字符串,Jenkins console 默认有读权用户均可见 |
+| Q13.4 conftest | ⚠️ | — | — | — | `from data_factory import` 路径风险 ✅;autouse fixture 实际 `logger.warning(...)` 不静默;pytest.ini 中文路径 ✅ |
+| Q13.5 requirements | ✅ | — | — | — | mss>=7.0.1,<10 唯一范围版本 |
+| Q14 install.sh 自身 | ⚠️ | — | ❌ (trap rm 反模式说法) | +5 项新(curl-pipe 无 hash、npm 无 pin、Playwright sudo 卡 curl-pipe、Windows CJK locale clone)| trap rm `dirname $TEMPLATE_DIR` 删的是 mktemp 临时父目录,是 correct 模式;但 `mktemp` 返回空字符串边界条件下可能 rm -rf /,需加 `[[ "$TEMPLATE_DIR" == /tmp/* ]]` 守 |
+| Q15 16 专家半成品 | ✅ | — | — | +"_精髓库 私有路径泄漏跨 15 个文件" | 15-渗透/16-车载 < 5KB;非标 frontmatter;测试主管路由表全无 15/16;新增:`D:/项目文件/_精髓库/` 路径同时出现在 `00-项目导航.md` / `runtime/scheduler/__init__.py` / `runtime/subagent/__init__.py` / `CHANGELOG.md` / `NOTICE.md` 等 **15 个文件** |
+| Q16 marketplace 空壳 | ✅ + 加重 | — | — | +"4 个 lane 目录根本不存在(不只是空)" | `.claude-plugin/marketplace.json` 缺;registry.json `entries: []`;profiles/compliance/ 10 YAML 全 status: skeleton |
+| Q17 周边不部署 | ✅ | — | — | +"examples/ tracked = 10 (clean); docs/ orphan 45 (zero refs); scripts/ git-pre-tag.sh 0 wiring" | install.sh grep `examples\|discussions\|docs\|scripts\|marketplace\|profiles\|runtime` 唯 1 命中(workspace 内 scripts) |
+| Q18 utils 代码质量 | ✅ (26/29) | ⚠️ 3 | ❌ 1 (conftest 静默说法) | +11 新(SA3 NEW-1..NEW-11) | conftest 实际有 `logger.warning`;datetime.utcnow 仅 2 处不是 "多处" |
+| Q19 skill 命名 | ✅ | — | — | +"YAML 引号风格不一" | 命名混乱 ✅;中英文混杂 ✅(实测 33 中 + 2 英 = 33/2,baseline 漏数 1) |
+| Q20 杂项 | ✅ 5 + ⚠️ 2 | ⚠️ | — | — | HANDOFF/COC 私密通道已有 → partial;其余 confirmed |
+
+**整体**:baseline 20 章中 **3 章误判(Q4 / Q5 / Q13.1 后半 / Q13.3 / Q14:50)**;5 章成立但严重度被低估;其余 12 章基本准确。**baseline 整体方向准、但 5 处需要订正**。
+
+---
+
+## 3. P0 致命问题(实测确认 + 新增)
+
+### P0-Q1 [致命] install.sh 硬编码列表 14/13,漏 2 agent + 19 skill + 2 子目录
+基线 Q1 确认。**install.sh:101-104** 字面 `for d in 01..14`;**:106-109** 字面 13 个 skill 名;**:108** 是 bare `cp` 无 `-r`。
+
+**漏掉**:
+- agent: `15-渗透测试.md`、`16-车载测试.md`
+- skill .md: 7 pentest-* + 5 automotive-* + 6 ECC + `build-your-own-x-explorer.md` = 19
+- skill 子目录: `darwin-skill/`(40+ 文件)、`karpathy-guidelines/`(SKILL.md + 0 字节 LICENSE-UPSTREAM)
+
+**修法**:`find "$TEMPLATE_DIR/agents" -maxdepth 1 -name '[0-9]*.md' -exec cp` + `rsync -a` 子目录。
+
+### P0-Q2 [致命] install.sh:15 banner V1.0.0 vs VERSION 1.14.0-alpha vs runtime/pyproject 1.1.0a0
+3 个版本号互不对齐,且 CONTRIBUTING.md:128 自定 "同步铁律" 自破。真机 reproduce 实测 stdout 第 2 行 = `Test-Agent 工作流一键部署 V1.0.0`。
+
+### P0-Q3 [致命/架构] runtime/ 不被 install.sh 装 + 1/3 是悬空层
+基线 Q3 确认 + 加重:即使 `pip install -e runtime/`,`runtime/scheduler / backends / learning_loop / gateway` 共 1627 LOC 仍无 CLI/API 消费者。`runtime/cli/main.py` 和 `runtime/api/` 全文 grep 这 4 模块 0 命中,唯一外部使用是 `marketplace/verifier.py:40` 用了 `scheduler.injection_scan` 一个函数。
+
+### P0-NEW-A [致命] README 头条 `pip install -e .` 命令必败
+README:18-23 / README.zh-CN.md:18-23 写 `cd Test-Agent && pip install -e . && tagent demo`。仓库根目录**没有 `pyproject.toml` 也没有 `setup.py`**。新用户 5 秒报错 `does not appear to be a Python project`。
+
+正确命令应是 `cd Test-Agent/runtime && pip install -e .`,但 README 没写。
+
+### P0-NEW-B [致命] HANDOFF 自承 6 no-op vs README "16 expert agents" 营销分裂
+`discussions/HANDOFF_2026-05-12_V1.14.md` 第 83-103 行明列:env / mobile / visual / system / 渗透 / 车载 = 6 个 no-op;第 149 行写 "不要直接上 HN" 因为 16 expert 实际只有 5 真。README 头条仍卖 16 expert agents · Self-test 100%。**内部诚实 vs 对外营销公然分裂入仓**。
+
+### P0-Q6.1-Q6.4 [致命] 4 个代码运行性断裂
+1. `utils\mobile_driver.py:88` — `hub_url = _resolve_hub_url() if not use_cloud else _resolve_hub_url()` 三元两支相同
+2. `utils\mq_helper.py:60-69` — `for msg in self.consumer: return ...; if time.time() > end: break` 不可达
+3. `utils\push_test.py:50-68` — APNs 强制 HTTP/2 但用 `requests`(HTTP/1.1),行内注释作者自承 "实际生产用 hyper / httpx HTTP/2;此处简化"
+4. `utils\media_validator.py:66` — `from utils.visual_helper import` 与全工程同级 import 风格不一致
+
+### P0-Q7 [致命/安全] 渗透 agent + pentest skill 授权零纵深
+- `agents\15-渗透测试.md:4` `tools: Read, Write, Edit, Bash, Grep, Glob`
+- `skills\pentest-coordinator.md:4` `pentest-exploit.md:4` 全 `Bash` 开
+- "授权检查" 仅 markdown prose `pentest-coordinator.md:20` 写 "读 tagent.yml: pentest.authorized: true",但 Claude Code skill loader **不读 tagent.yml**
+- `pentest-exploit.md:14` 引用 `runtime/backends/docker.py` 做沙盒,但 runtime 不被 install.sh 部署
+
+### P0-Q8 [致命/安全] 9 项已存在 + 4 项新增安全漏洞
+
+**baseline Q8.1-Q8.9 全部 confirmed**:
+- `db_test_helper.py:86,97,147,153` SQL/DB 名注入(4 处)
+- `email_sender.py:82-96` HTML 邮件 XSS
+- `generate_report.py:130-138/153-184/196-211` webhook markdown 注入(3 平台)
+- `chaos_helper.py:71-82/87-93/100-103/111` sudo iptables / sudo date / kill_by_name 子串杀 / kill_pod kube-system / 无 try/finally 回滚
+- `security_scanner.py:127-217` API key 入 URL path/query
+- `zentao_bug_manager.py:41,53` 明文密码 POST + `self.password` 永驻
+- `api_security_scanner.py:92-111` SSRF probe 默认含 169.254.169.254 + file:///etc/passwd
+- `ai_adversarial.py:104-116` JAILBREAK_PROMPTS / PROMPT_INJECTION_TEMPLATES 顶层常量
+- `data_factory.py:76-80` DSN 不 `quote_plus` 密码
+- `push_test.py:50` p8 路径不校验
+
+**SA3 新增 4 项 HIGH**:
+- `db_test_helper.py:147,153` CREATE/DROP DATABASE f-string `db` 参数(细化 baseline Q8.1)
+- `db_test_helper.py:86` 任意 SQL via `text(f"EXPLAIN {sql}")`
+- `desktop_driver.py:51-58` AppleScript 源码 f-string 拼 `app_name/menu/item` → AppleScript 注入逃逸到 `do shell script`
+- `chaos_helper.py:100-104` 网络分区无 try/finally,CI 被 Ctrl-C 后 iptables DROP 规则**永久残留**
+
+### P0-NEW-C [致命/runtime] backends/__init__.py REGISTRY 永空
+`runtime/backends/__init__.py` 只 import base,不 import `local/docker/ssh/...`。`get_backend("local")` 立即 `KeyError`。tagent.yml.example 配的 7 后端默认 import 路径全部不可用。
+
+**修法**:照 `gateway/platforms/__init__.py` 写法,在 `backends/__init__.py` 加 `from runtime.backends import local, docker, ssh, singularity, modal, daytona, vercel_sandbox`。
+
+### P0-NEW-D [致命/runtime] orchestrator `_upstream_outputs` 跨线程 race + RunnerResult.ok 永远 True
+- `runtime/orchestrator/adapters/experts.py:120` `_upstream_outputs: dict[str, dict] = {}` 模块全局,被 ThreadPoolExecutor 和 Prefect ConcurrentTaskRunner 并发读写,无锁。同 run 内并发 + 不同 run 都会 race。
+- `runtime/orchestrator/agents/base.py:91` `return RunnerResult(name=..., ok=True, ...)` 硬编码 True,即使 LLM 抛错走 mock fallback 也 ok=True。test-lead "上线决策" 看到全 ok 实际是 mock 数据,**主宪章 §10 "skin in the game" 被自己破**。
+
+### P0-NEW-E [致命/runtime] backends SSH MITM + 6 处 shell 拼接
+- `runtime/backends/ssh.py:29` `asyncssh.connect(..., known_hosts=None)` 关闭 host key 校验,教科书级 MITM
+- `runtime/backends/ssh.py:37,54` `f"cd {cwd} && {cmd}"` / `f"cat {path}"` — 远端命令注入
+- `runtime/backends/local.py:20` `create_subprocess_shell(cmd)` = `sh -c "$cmd"`
+- `runtime/backends/docker.py:42` / `singularity.py:40` / `daytona.py:46` / `modal.py:49` — `sh -lc {cmd}` 拼接面同
+- `runtime/backends/vercel_sandbox.py:41-46` — **API endpoint 是凭空捏造**(Vercel 公开 API 无 `/v1/sandboxes`),connect() 立即 404
+
+### P0-NEW-F [致命/runtime] 默认 LLM model ID 真调第一行 400
+`runtime/router/llm_client.py:13`:
+- `"claude": "anthropic/claude-sonnet-4-6"` ← 不是真实模型 ID
+- `"qwen": "openai/qwen-plus"` ← 应为 `dashscope/qwen-plus`
+
+用户默认 `llm_provider=claude` 启动 → LiteLLM 透传 Anthropic API 直接 400 `unknown_model`。
+
+### P0-NEW-G [致命/runtime] evidence_vault path traversal
+`runtime/mcp/evidence_vault/server.py:67-74` `tool_upload_evidence_path(run_id, kind, path)` — `path` 无 allowlist + 不限定到 `workspace_dir`,任何 MCP 客户端可读 `/etc/passwd`、`~/.ssh/id_rsa`、`~/.aws/credentials` 推送到 MinIO。同仓 `compliance_checker.tool_get_profile` 已有正确的 `Path.relative_to(base)` 模式,未应用于此。
+
+### P0-NEW-H [致命/runtime] api/main.py 进程级 env 状态 + 0.0.0.0 + 无 auth + SSRF
+- `runtime/api/main.py:34-39` `set_mode(mode) / set_lang(lang)` 写 `os.environ["TAGENT_MODE"]` / `["TAGENT_LANG"]`。2 个并发请求不同 mode → 互踩
+- `runtime/config/settings.py:39-49` `api_host: str = Field(default="0.0.0.0")` 默认全网可达
+- `/run/text` `/run/file` `/run/url` `/status/*` `/report/*` `/catalog` 全部无认证
+- `/run/url` 无 SSRF guard(攻击者填 `http://169.254.169.254/...`)
+- `tempfile.NamedTemporaryFile(delete=False)` 上传文件无清理,`/tmp/` 累积
+
+### P0-NEW-I [致命/runtime] marketplace 安全 gate 是 security theatre
+- `runtime/marketplace/verifier.py:50-67` `gate_sandbox_dry_run` 只做 `ast.parse(text)`,文档承诺 "Docker sandbox" + 同函数 line 66 注释自承 "production: subprocess.run(['docker run --rm --network=none', ...])" 未实现。恶意 `.md` skill 嵌入 prompt 完全通过
+- `runtime/marketplace/verifier.py:70-89` `gate_darwin_score` 是 6-line 关键词长度启发式(base 50 + has `name:` 10 + has `description:` 15 + len>200/500 + has `trigger`/`when to use` 10)。任意 markdown frontmatter ≥200 字符通过
+
+### P0-NEW-J [致命/治理] NOTICE.md 5 处虚假许可证标注
+GitHub License API + 顶层文件实测:
+- `darwin-skill` 上游 `alchaincyf/darwin-skill` `license: null`(仅 README 口头 MIT + badge 404)→ NOTICE 标 MIT 不成立
+- `karpathy-guidelines` 上游 `forrestchang/andrej-karpathy-skills` `license: null` + 仓内 `LICENSE-UPSTREAM` 0 字节 + frontmatter 自报 MIT 无效
+- `karpathy-skills`(同 forrestchang 仓库)同上
+- `build-your-own-x` 上游 `codecrafters-io/build-your-own-x` `license: null` → NOTICE 标 CC0 不成立
+- `gbrain` baseline 已标 `(查源)` TODO 残留 → 实际 MIT 待填实
+
+### P0-NEW-K [致命/法律] 武器化代码零授权墙
+`SECURITY.md` / `README.md` / `NOTICE.md` 全文 grep `authorized/authorization/warning/合规/授权` → **0 处**与渗透/AI 对抗相关。
+- `api_security_scanner.py:96-100` SSRF probe 默认 169.254.169.254 + file:///etc/passwd
+- `ai_adversarial.py:104-116` JAILBREAK_PROMPTS 顶层常量
+- 15-渗透测试 agent `tools: Bash`,tagent.yml `authorized: true` 由用户自填 = **自证授权**
+
+误用即攻击。中国《刑法》§285-§287、美 CFAA、欧盟 NIS2 下,项目方在协助/教唆层面有民事 + 刑事连带风险。
+
+### P0-NEW-L [致命/supply chain] curl-pipe + npm 无 pin + git clone branch 无 hash
+- install.sh:4 README 推 `curl -fsSL .../install.sh | bash -s -- /path`
+- :80 `git clone --depth 1 --branch "$REPO_BRANCH" "$REPO_URL"` — 无 tag pin、无 GPG 签名验证、无 checksum
+- :85 `npm install -g @anthropic-ai/claude-code` — 无版本 pin、无 `--ignore-scripts`、无 integrity 校验
+
+默认分支被攻击 / npm 包链上任一传递依赖被污染 → 远程一行用户 RCE 全网。配合 P0-Q7 渗透 skill 已 `Bash` 全开 = 最浓缩的攻击面。
+
+---
+
+## 4. P1 重要问题
+
+### P1-Q9 文档数字 **11** 重不一致(baseline 写 "六重")
+13 处文档全部命中 baseline 表格。新增证据:
+- **CHANGELOG 自己算术互锁矛盾**:V1.6 `14 skill → 26` (+7+5 = 26 漏算基线 14),V1.7 `14 → 32` (+18 漏算 karpathy 应是 33)
+- `runtime/INDEX.md:8` 也写 "14 专家定义 + 13 Skill + 49 脚本"
+- `01-测试主管.md` frontmatter:3 + lines 11/342 全文 grep `pentest|automotive|渗透|车载|15|16` = **0 命中**。即使 install.sh 修了,**测试主管也不会调度渗透/车载专家**
+- `runtime/healthcheck/agent_smoke.py:27` `EXPECTED_AGENTS=16` ↔ install.sh 部署 14 → doctor 报 16/16 OK 同时 install 给 14,**两个真相同仓**
+
+### P1-Q10 同名借壳 — 官方仓 0/4 命中 + 3 处伪造派生
+GitHub `anthropics/skills/skills/` 实测 17 个 skill:`algorithmic-art / brand-guidelines / canvas-design / claude-api / doc-coauthoring / docx / frontend-design / internal-comms / mcp-builder / pdf / pptx / skill-creator / slack-gif-creator / theme-factory / web-artifacts-builder / webapp-testing / xlsx`。
+
+- `security-review` — 不在 anthropics/skills,**但是 Claude Code 内置 slash command**(用户键入 `/security-review` 时优先级与本地 skill 未验证)
+- `tdd-workflow` — frontmatter 自称 "派生自 ECC 同名 skill",**ECC 上游不存在此 skill**(实测 `affaan-m/everything-claude-code` 只有 `everything-claude-code-conventions`)
+- `build-your-own-x-explorer` — 无官方同名;codecrafters/build-your-own-x 是数据源,非同名 skill
+- `agent-introspection-debugging` — 同 `tdd-workflow`,**伪造 ECC 派生声明**
+
+### P1-Q11 darwin / karpathy 上游无 LICENSE + 本地履行义务断
+见 P0-NEW-J。基线判 P1,本审升 P0(法律风险)。
+
+### P1-Q13.2 GH Actions
+- `:349` upload-artifact path 含空格 → @v4 glob 按 space 切 3 个目标,第 2 目标变绝对路径必失败
+- `:390` `peaceiris/actions-gh-pages@v3` 已弃用,且用 tag 而非 SHA → 供应链投毒
+- `:382` `simple-elf/allure-report-action@v1.7` Node 16 已下线
+- `:443/452` `${{ github.ref_name }}` 拼 curl JSON body → 分支名含 `"` 即 JSON 注入;分支名含 `[` 即 markdown 注入
+
+### P1-Q13.3 jenkins 重判
+- baseline 对单引号判错(Groovy `"""sh '...'"""` 是正确的:外层 GString 插值 `${WECHAT_WEBHOOK}`、内层 sh single-quote 防二次展开)
+- **真问题**:行 287/352/364 `curl -X POST "${WECHAT_WEBHOOK}"` 经 GString 插值后 webhook URL(含 secret key)写入 sh 命令字符串,**Jenkins console 默认所有有权用户可读** → 凭据泄漏。正确做法 `withCredentials([string(credentialsId: ...)])`
+- `:262` `echo "$BRANCH_NAME"` 自由风格 Pipeline 不注入 BRANCH_NAME → UPDATE_FLAG 永不触发
+- `:330` `jdk: 'JDK17'` 要求全局工具配置存在,模板未给安装步骤
+
+### P1-Q13.5 requirements.txt
+- `:80` `mss>=7.0.1,<10` 唯一范围版本,破坏 "全部 == 锁定" 自承诺
+- **未验证项**:subagent 称 playwright/requests/pypdf/Pillow/paramiko 版本不存在 — 需项目方在真实 pip index + NVD/MITRE 上核 CVE 编号(CVE-2026-25645/28684/25990/44405 当前日期 2026-05-12 部分可能尚未公开)
+
+### P1-Q14 install.sh 自身(修正版)
+- :7 `set -euo pipefail` + :154 `source .venv/Scripts/activate` 若 activate 引用未定义 var → unbound exit
+- :50 trap 是 correct 模式但 **边界条件下可能 rm -rf /**(mktemp 返回空时),需加 `[[ "$TEMPLATE_DIR" == /tmp/* ]] || [[ ... == "$TMPDIR"/* ]]` 守
+- :85 `npm install -g @anthropic-ai/claude-code` 无版本 pin、Linux 非 root 必 EACCES
+- :90-98 mkdir 产物路径与 agents/08-Bug管理.md 默认产物路径**不完全对齐**:装完没有 `discussions/` / `decisions/` / `skill-evolution/` / `测试用例/charters/`(FULL_GUIDE.md:777-800 表里写了 7 个 workspace 子目录,install.sh 只生成其中 4 个)
+- :117 `.env.example → .env` 若 `.env.example` 含真 key 直接落 `.env`
+- :162-163 `PYTHONUTF8=1 PYTHONIOENCODING=utf-8` 设置时机晚于 :76 mktemp + :80 git clone → Windows 非 UTF-8 locale clone CJK 路径可能炸(**需在 :76 之前 export**)
+- :166 `playwright install chromium --with-deps`:macOS 非阻断(fallback echo),Linux curl-pipe 模式下 sudo 提示**无 tty 无限卡死**
+
+### P1-Q15 16 专家半成品(confirmed 全部)
+- 15/16 文件大小 ≈ 5KB vs 前 14 个 7-20KB
+- `requires_layer: [base, security]` / `[base, system]` 非标 frontmatter,被 Claude Code agent loader 忽略
+- `15-渗透测试.md:10` 含绝对路径 `D:/项目文件/_精髓库/pentest-ai-agents.md`
+- `01-测试主管.md` 路由表全无 15/16
+- 代码引用错误 5 处:`07-测试执行.md:179` from env_manager(agent name 非 module)/ `09-报告生成.md:95` namespace 风格不一致 / `11-桌面测试.md:125` Playwright 私有 API / `12-视觉游戏测试.md:142` VIS vs VISUAL / `14-AI模型测试.md:197` claude-3-opus 已退役
+
+### P1-NEW-M `D:/项目文件/_精髓库/` 私有路径泄漏跨 15 个文件
+不止 15-渗透测试.md:10 一处。`grep D:[/\\]项目文件 ...` 命中 15 文件含:
+- `00-项目导航.md`(顶层导航,每用户必读)
+- `runtime/scheduler/__init__.py`、`runtime/subagent/__init__.py`(**代码层泄漏**,import 时即暴露)
+- `runtime/gateway/INDEX.md`、`runtime/backends/INDEX.md`、`runtime/learning_loop/INDEX.md`
+- `CHANGELOG.md`、`NOTICE.md`、`.pre-commit-config.yaml`、`.gitignore`、`_精髓库_apply_policy.example.yaml`
+- `examples/INDEX.md`
+
+每次 LLM 加载这些索引/init 都把作者本机 Windows D 盘路径带进 prompt/日志。runtime/__init__.py 里的硬编码还可能在 import 时影响 sys.path 解析。
+
+### P1-NEW-N selftest 日志内含敏感信息入仓
+`discussions/selftest_1.{11,12,13,14}.0-alpha_*_e2e.log` 含:
+- 本地绝对路径 `D:\项目文件\Test-Agent工作流搭建\runtime\mcp\base.py`
+- `C:\Users\admin\AppData\Roaming\Python\Python314\site-packages\...`
+- 默认 DB URL `postgresql+psycopg://tagent:tagent@localhost:5432/tagent`
+- ANSI 转义 + ProactorEventLoop + Prefect + SQLAlchemy 内部栈
+
+属于 "开发者本机叙事漏到公开仓" 类型。
+
+### P1-NEW-O selftest "100% PASS" 是 stub path 通
+V1.13 / V1.14 selftest log body 中部含 `ModuleNotFoundError: No module named 'psycopg'`,被 try/except 兜住后走 stub。`runtime/mcp/knowledge_base/server.py:114 → storage/db.py:20 → psycopg 缺`。README badge `Self-test 100%` 是写死 SVG,不是 CI 状态。
+
+### P1-Q16 marketplace 不止空壳,结构没立
+- 4 个 lane 目录(agents/skills/hooks/mcp)**根本不存在**(baseline 说 "全空",实际是 "目录都没创建")
+- `.claude-plugin/marketplace.json` 缺
+- `registry.json` = `{"version":"1.0","entries":[]}`
+- `profiles/compliance/` 10 个 YAML 全 `status: skeleton`,无规则执行代码,无对应 utils/check_*.py
+
+### P1-Q17 examples/ docs/ scripts/ discussions/ marketplace/ profiles/ runtime/ 全不被 install.sh 部署
+- examples/ tracked 仅 10(基线 2457 误判,2447 delta 是 `.venv/__pycache__/.pytest_cache` 全被 `.gitignore` 拦)
+- **docs/ 是 orphan tree**:45 个 tracked + 0 个 FULL_GUIDE 引用 + 0 个 00-项目导航 引用
+- `scripts/git-pre-tag.sh` 文档定为 "release gate" 但**无 git hook 无 CI 注册**——honor system
+- runtime/cli + runtime/api 都不被 install.sh 触及 → tagent CLI 永远缺位
+
+### P1-NEW-P FULL_GUIDE.md 25% 是 vaporware
+`FULL_GUIDE.md:540-621` 整段 "按需安装与依赖分层" 描述 6 个 requirements 文件 + 6 选项交互菜单 + `install.sh --add visual,ai`:
+- 仓库**只有 1 个 config/requirements.txt**,无任何分层文件
+- install.sh **无交互菜单**(:100-180 是顺序硬拷贝)
+- install.sh **无 `--add` 参数解析**
+- frontmatter `requires_layer: [base, mobile]` 只在 15/16 两个新 agent 出现,前 14 个不声明
+
+类似 vaporware 段还有:`/bug-submission` 命令实际 skill 名 `zentao-bug-submission`、`darwin-skill 季度同步` 无 schedule job、`AgentChat 协调器 agentchat_recorder` 工程落点不存在、`Skin in the Game / Via Negativa / 熄火协议` 无对应代码。
+
+### P1-NEW-Q HTML Web UI 3/5 README 特性未交付
+`runtime/web/INDEX.md` 宣传 tus-js-client (resumable upload) / Vitest unit tests / SSE / axe-core / refetchInterval polling。实测:
+- tus-js-client **不在** dependencies
+- Vitest 在 devDeps,但**无 `*.test.ts(x)` 文件**,`vitest run` 找到 0 files
+- SSE 实际是 `refetchInterval: 2000` 轮询
+- axe-core ✅(7 个 Playwright e2e)
+- 文件上传是 plain FormData,无 resumable chunking
+
+### P1-Q12 治理残留
+- `NOTICE.md:19` gbrain `(查源)` TODO + 缺 requests/openai/anthropic/bandit/pip-audit/safety 归属
+- `NOTICE.md:39-41` 多端通知(WeChat/Lark/DingTalk/Slack/Teams)只笼统提 "MCP 协议",未声明各服务 ToS
+- `CHANGELOG.md:13-356` 全 [Unreleased],357 行才 [1.0.0],13 个 alpha 全堆,违反 Keep a Changelog 1.1.0
+- `CHANGELOG.md:351` 同样硬编码 14/13/49
+
+---
+
+## 5. P2 瑕疵
+
+### P2-Q18 utils 代码质量(基线 + 新增)
+- `api_retry_util.py:42-43` / `websocket_helper.py:138-140` bare `except Exception: pass`(confirmed)
+- `conftest.py:212-213` 实际有 `logger.warning(...)` 非静默吞噬(**baseline 错**)
+- `datetime.utcnow()` 仅 2 处(不是 "多处"):`api_retry_util.py:39`、`security_scanner.py:98`
+- `dora_metrics.py:31/71/95` `.replace("Z","")` 跨时区偏移
+- `a11y_scanner.py:29` cdnjs / `web_vitals_collector.py:50` unpkg.com 外网 CDN 硬依赖
+- `mobile_driver.py:138-164` _parse_top_cpu / _parse_meminfo 解析脆弱;**:161-164 `PROFILEDATA` 当 FPS 是语义错**(PROFILEDATA 是段标记不是帧),加上 `:115` `dumpsys gfxinfo {package} reset` 与 collect 互斥 → FPS 永远 0
+- `openapi_test_gen.py:50` dict 第一个 key 当 expected_status 顺序不稳
+- `excel_generator.py:60/82` priority 双重隐式 fallback
+- `soak_runner.py:114` `assert` 做生产判断,`python -O` 时 soak 永远通过
+- `contract_test.py:33` `**kwargs + timeout=10` 冲突 → TypeError
+- `websocket_helper.py:67/95-102` **ping 线程才是真泄漏源**(_listen 线程在 except 链 break,ping 线程裸起无 stop 状态)
+- `miniprogram_runner.py:54` 每次新建 ws + close
+- `iot_helper.py:35` `paramiko.AutoAddPolicy` 在 `skip_host_key_check=True` 时启用 → **真 SSH 安全失守在这里,不是 password**
+- `blockchain_test.py:74` `["slither", contract_path, ...]` argv,contract_path = `-rpc-args` 被 slither 当 flag → 应 `["slither", "--", contract_path]`
+- `prd_loader.py:38-39` URL fetch 仅 `re.match(r"^https?://")`,无主机白名单 → SSRF 169.254.169.254 / localhost。**严重度低估,应升 P1**
+
+### P2-SA3-NEW(baseline 漏)
+- **NEW-5 [MED]** `prd_loader.py:181-183` Zip-Slip + Zip-Bomb(zipfile.extractall 无 entry 数/总大小限制)
+- **NEW-6 [MED]** 可预测 /tmp 路径:`chaos_helper.py:50 /tmp/chaos_disk_test`、`db_test_helper.py:131 /tmp/backup.dump` → symlink follow / 共享 CI runner 互覆盖
+- **NEW-7 [MED]** `iot_helper.py:35` AutoAddPolicy(见上)
+- **NEW-8 [LOW]** 全包 49 utils **零 `__all__`**,`from module import *` 暴露所有私下名(`_priority_cell` 等)
+- **NEW-9 [LOW]** `compatibility_matrix.py:25-35` 设备数据过期(Pixel 6 Pro / iPhone 15 Pro / Galaxy S23 — 2026-05 主流应是 Pixel 9 / iPhone 17 / Galaxy S25)
+
+### P2-Q19 skill 命名 + Q20 杂项
+- 命名混乱(confirmed):smoke-test / pentest-recon / automotive-hil-loop-test / build-your-own-x-explorer / agent-introspection-debugging 各种 1-4 段杂
+- description 语言混杂:33 中 + 2 英
+- YAML 引号风格不一(新发现)
+- `CONTRIBUTING.md:113-115` + `CHANGELOG.md:351` 同样硬编码 14/13/49 → V1.6+ commit 全失败(除非 --no-verify)
+- `CONTRIBUTING.md:131-184` RACI 矩阵仅 14 列,**作者新加 15/16 时漏改 RACI**
+- `CODE_OF_CONDUCT.md:43` 公开 Issue 走 `code-of-conduct` 标签(行 44 也写了 GitHub Security Advisory 私密通道)→ baseline 描述片面
+- `SECURITY.md:42` "私有源 MD 隔离" 黑话
+- `CHANGELOG.md:353-355` 连续 `---` 渲染出空 hr 段
+- `tagent.yml.example:60` `compliance_profiles: []` 默认空,§21 双签未给参数模板
+- `requirements.txt` 注释 CVE-2026-25645/28684/25990/44405 — 当前日期 2026-05-12 部分编号可能尚未公开发布,需项目方在 NVD/MITRE 核对
+- `FULL_GUIDE.md:1048-1058` 开放问题表 Q1/Q6 整列空白(修订残留)
+
+---
+
+## 6. runtime/ 全新章节(基线最大盲区,本审补完)
+
+baseline 自承 "runtime/ 内部 29 个子模块未逐文件审查"。SA4 + SA5 共审 22 子目录 / 132 个 .py。
+
+### 子模块健康度总表
+
+| 模块 | 文件 | LOC | 测试 | 被 CLI/API 调 | 状态 |
+|---|---|---|---|---|---|
+| **真核心** | | | | | |
+| router | 8 | 505 | yes (test_router) | yes | OK;F23 model ID 错;F24 magic number |
+| registry | 2 | 152 | yes (test_registry) | yes | OK;F27 模块全局 _cache 无锁 |
+| orchestrator | 14 | 961 | smoke_e2e 间接 | yes | **F8 + F9 P0**:_upstream_outputs race + RunnerResult.ok 永真 |
+| storage | 9 | 538 | smoke 间接 | yes | F25 migration downgrade SQLite 炸;F26 minio no TLS |
+| cli | 2 | 540 | **无** | self | F16 模块加载即 Kernel() 副作用;F17 init bug-tracker 矩阵漂;F18 demo 命令的 workaround 证明 F16 是已知 smell |
+| api | 4 | 389 | **无** | self | **P0-NEW-H**:F2/F3/F4/F5/F35 — 无 auth + 0.0.0.0 + 进程级 env + 无 SSRF + tempfile 漏 |
+| mcp (6 servers) | 16 | 1200 | **无** | self | **F1 REFUTES baseline Q13.1**:6 件套全在;**F7 P0 path traversal evidence_vault**;F8 defect_tracker ADAPTERS={} 永空;F9-F10 knowledge_base pgvector 绑定 + _embed_stub 不归一 |
+| subagent | 4 | 120 | **无** | self | F28 32 workers 不视 CPU;F29 fanout 总 budget 而非 per-task |
+| config | 3 | 170 | **无** | self | **F21 P0** api_host=0.0.0.0 + hardcoded minio creds;F22 safety @lru_cache 隐藏文件变更 |
+| **悬空层** | | | | | |
+| scheduler | 4 | 361 | **无** | **0** | F7 croniter 不在 deps;F14 injection_scan 8 条正则太弱 |
+| backends | 9 | 580 | **无** | **0** | **F1 P0** REGISTRY 永空;**F3+F4 P0** SSH MITM + cmd 注入;F5 vercel_sandbox API 假;F6 local shell 注入;F11 singularity sync_in no-op;F12 modal sync 阻塞 async |
+| learning_loop | 4 | 258 | **无** | **0** | F19 curator 是脚手架无评分/衰减逻辑;F20 user_model.add_fact 无锁;F21 session_search 重复 init_db |
+| gateway | 12 | 428 | **无** | **0** | F15 get_platform 未导出;**F16** 6 平台 webhook URL 无白名单 → SSRF;F17 email send 强 reconfigure;F18 session bind 无锁 |
+| **其他** | | | | | |
+| init | 4 | 470 | **无** | self | matrix.yaml + 3 templates OK;preset 与 cli demo 一致性问题 |
+| observability | 3 | 93 | **无** | self | **F14**:`init_tracing()` 仅 orchestrator 调,**api/cli/FastAPI 全未挂** |
+| healthcheck | 3 | 200 | **无** | self | **F15** `EXPECTED_AGENTS=16` 与 install.sh 14 冲突 |
+| tutor | 8 | 740 | **无** | self | mode/lang 进程级 env 全局态(同 P0-NEW-H) |
+| essence_watcher | 5 | 360 | **无** | partial | F30 silent skip on missing `gh` CLI |
+| exporters | 5 | 310 | **无** | self | F32 xmind 缺 thumbnails/styles(XMind 2023+ 显示 "untitled") |
+| marketplace | 4 | 320 | **无** | self | **P0-NEW-I**:F11/F12 4 关 gate 是 security theatre |
+| tests | 4 + conftest | 270 | self | — | **F25 P0**:~11 个 test,5-6 个 meaningful,0 个 cover MCP/api/cli/subagent/init/tutor/healthcheck/marketplace/exporters/observability/config |
+| web | 4 + 1 e2e | ~270 ts | 7 playwright | self | **F26**:3/5 INDEX 特性未交付(tus/Vitest/SSE);F27 BASE 硬编码 /api 无 env 覆盖 |
+| **top-level** | 4 | 182 | — | — | **F19** Dockerfile 跑 root 无 HEALTHCHECK 无 .dockerignore;**F20** compose 硬编码 tagent/tagent 创 + 5432/9000/4200 全暴露 + Grafana anonymous Admin;F23 deps 无上界(Prefect 2.20+ 会解到 Prefect 3 API 不兼容)|
+
+### 4 个悬空模块的真相
+
+**4 个悬空层(scheduler + backends + learning_loop + gateway = 31 文件 / 1627 LOC)在 `runtime/cli/main.py` `runtime/api/` 全文 grep 0 命中**。唯一外部使用是 `runtime/marketplace/verifier.py:40` 引用 `scheduler.injection_scan` 一个函数。
+
+`tagent.yml.example` 里 backends/scheduler/learning_loop/gateway 配置块用户填了也没人读。hermes 蓝图("调度器 + 多平台 + 7 后端 + 学习闭环")整层是 dead inventory。和 baseline Q3 是同一个根因。
+
+### 架构 smell
+
+1. **模块全局可变状态无锁**:`_upstream_outputs`(orchestrator)、`_cache`(registry)、`_engine`(storage)、`REGISTRY`(backends)— 整层假设单线程,Prefect ConcurrentTaskRunner 一开并发就同时炸
+2. **register-decorator 注册模式**依赖 import-time 副作用,但 `backends/__init__.py` 没 import 子模块 → REGISTRY 永空。`gateway/platforms/__init__.py` 写对了,backends 没复制
+3. **shell 拼接是默认模式**:6 个 backend 都用 `sh -lc cmd` 拼接,LLM 链路任一上游脏数据 = RCE
+4. **教学 vs 生产模糊**:scheduler `_default_runner` import Kernel;scheduler 跑起来要 api 子包整体 import,启动顺序耦合
+5. **tagent CLI 模块级 Kernel()**:F16 `_kernel = Kernel()` 模块加载即副作用 → `tagent --help` 也跑 logging init + 读 .env
+
+### 测试覆盖
+- router/registry ~70%;orchestrator ~30% (smoke);其他 18 个子目录 = 0
+- 整 runtime 测试 ~11 个,meaningful ≈ 5-6 个,0 个 cover F2-F35 中任何 bug
+- "Production runtime" 营销话术 vs 真覆盖率严重落差
+
+---
+
+## 7. 治理 / 法律线(SA7 全核)
+
+### NOTICE.md 全表实测
+| NOTICE.md 行 | 标 | gh api 实测 | 判 |
+|---|---|---|---|
+| 9 darwin-skill | MIT | `license: null` | ❌ |
+| 10 karpathy-guidelines | MIT | `license: null` | ❌ |
+| 18 hermes-agent | MIT | MIT | ✓ |
+| 19 gbrain | (查源) | 实际 MIT | TODO |
+| 20 karpathy-skills | MIT | `null`(同 row 10 同源)| ❌ |
+| 21 everything-claude-code | MIT | MIT | ✓ |
+| 22 pentagi + shannon | MIT + AGPL-3.0 | 同 | ✓ |
+| 23 build-your-own-x | CC0-1.0 | `null` | ❌ |
+
+**5 处虚假标注(4 MIT/CC0 + 1 TODO)**。baseline Q12.1 只点 gbrain,实际是 5 处。
+
+### 武器化代码 0 处授权墙
+逐文件 grep `authorized|authorization|warning|合规|授权`:
+- `SECURITY.md`:0 处涉及 pentest
+- `README.md` / `README.zh-CN.md`:仅 happy-path 提 `Security researcher → pentest-coordinator`
+- `NOTICE.md`:0 处
+- `api_security_scanner.py`:0 处
+- `ai_adversarial.py`:0 处
+
+`tagent.yml pentest.authorized: true` 由用户自填 = **自证授权**。中国《刑法》§285-§287 / 美 CFAA / 欧盟 NIS2 下,项目方在协助/教唆层面连带责任。
+
+### AGPL 兼容
+`_精髓库/pentest-ai-agents.md` 萃取自 shannon (AGPL-3.0) + pentagi (MIT)。本文档自承 "仅萃取思想不复制代码" + 有完整致谢,但**缺一份显式 idea-expression 抗辩段**(如:"本文档为对 shannon 架构的观察,不复用源代码/字符串/API 签名/数据结构;如发现疑似复用请提 issue 立即移除")。AGPL 实务中举证成本高,律师函风险存。
+
+### 推荐法律首动作
+1. **(48 小时)** NOTICE.md 5 处虚假标注全改 "No license stated by upstream; included on best-effort attribution, will remove on takedown"
+2. **(48 小时)** SECURITY.md 增 "武器化代码用户责任" 段
+3. **(1 周)** darwin-skill 子目录补 LICENSE 文件履行 MIT "copyright notice shall be included";上游开 issue 求补 LICENSE
+4. **(1 周)** 删 3 处伪造 ECC 派生声明(tdd-workflow / security-review / agent-introspection-debugging frontmatter)
+5. **(2 周)** CONTRIBUTING.md 补 DCO 段(防止贡献者源权争议)
+6. **(2 周)** `_精髓库/pentest-ai-agents.md` 补 AGPL 抗辩段
+
+---
+
+## 8. 真机 install.sh reproduce 结果(Windows MSYS)
+
+### 部署 vs 宣传 对照表(真测量)
+
+| 类别 | 宣传 | 仓库源 | install.sh 硬编码 | 真跑部署 | 缺 |
+|---|---|---|---|---|---|
+| agents .md | 16 | 16+README | 14 | 14 | 2 (15/16) |
+| skill .md 顶层 | 32 | 32+README | 13 | 13 | 19 |
+| skill 子目录 | 2 | 2 | 0 | 0 | 2(无注释说明是有意) |
+| utils .py | 49 | 48+__init__ | 48+__init__ | 49 | 0 |
+| CI workflows | 2 | 2 | 2 | 2 | 0 |
+| compliance profiles | 10 | 10 skeleton | 0 部署 | 0 functional | 10(全 skeleton)|
+
+### Top 5 平台特定失败模式
+| # | 失败 | 平台 | 严重度 |
+|---|---|---|---|
+| 1 | `npm install -g` EACCES 无 sudo | Linux | HIGH 阻断 |
+| 2 | `playwright --with-deps` sudo 提示在 curl-pipe 模式下无 tty 无限卡 | Linux (curl-pipe) | HIGH 静默卡 |
+| 3 | banner V1.0.0 vs 真 1.14.0-alpha | All | MEDIUM trust |
+| 4 | install.sh 漏 2 agent + 19 skill | All | HIGH 能力缺 |
+| 5 | 二次跑覆盖用户自定 conftest/pytest/utils(idempotency 缺)| All | MEDIUM 数据丢 |
+
+### install.sh 好的部分
+- Windows python3 MS Store stub 检测正确(`for cand in python3 python py` + 退出码 49 兼容)
+- `PYTHONUTF8=1` + `PYTHONIOENCODING=utf-8` 防 Windows GBK 陷阱
+- PRESERVE_FILES 备份机制(限于 .env 等 4 个文件)
+
+---
+
+## 9. 修复路径 V2
+
+### 决策点 A(先拍这个,所有 P0 修都从这里分叉)
+
+baseline 已点 install.sh 装什么的 A/B/C 三选项。**本审增加一个 sub-decision**:
+
+**决策 A0:先撤回话术 vs 先修代码?**
+README 头条 "production runtime / 16 expert agents · Self-test 100%" 与 HANDOFF 自承 "5 真 + 5 SCRIPT + 6 no-op" 共存在仓——这个**信任沟无法用代码修复**。无论选 A/B/C,必须**先**改 README 撤回过度承诺,再谈技术修。否则用户读 README 进来发现 6 no-op,每个修好的 P0 都是 "为什么这里也不对" 的二次打击。
+
+**第 1 步必须完成(无论选 A/B/C)**:
+- README 头条改为 "5 LLM-driven expert agents + 5 script-backed + 6 stub-only (V1.x 实现中)" 或更保守表述
+- README badge "Self-test 100%" 改为 CI 真实状态徽章或直接删
+- "Production runtime" 改 "Alpha runtime (1.14.0-alpha)"
+- HANDOFF 文件不再放公开仓 主分支,改 `discussions/internal/` + `.gitignore`
+
+### 决策点 B:install.sh 装什么 (baseline Q3 A/B/C 选项)
+
+**推荐 A 选项** 理由:B 选项(pip install -e runtime/)会把 4 个悬空层一并暴露给用户,CLI 调不到反而更困惑;C 选项要砍 README 50% 内容,与决策 A0 工作量重复但收益小。
+
+A 选项落地:
+- install.sh 只装 A 层模板包(agent / skill / utils / 配置 / CI)
+- runtime/ 出独立 tagent-cli(PyPI 包),README 写 "若需 runtime 编排能力 → `pip install test-agent-runtime`"
+- README/FULL_GUIDE 拆为 2 个产品线叙事:CHARTER(永久宪章)+ ARCHITECTURE-V1(V1.0 模板线)+ ARCHITECTURE-RUNTIME(V1.14 runtime 线,明示 alpha)
+
+### 修复轮次(选 A 后)
+
+**第 1 轮(半天 — 信任修复)**:
+- 决策 A0 + B 拍板
+- README 撤回过度承诺(5 处话术)
+- HANDOFF 移出主仓
+- VERSION / install.sh banner / FULL_GUIDE / 00-导航 / 02-/03-/04-README / CONTRIBUTING / install.sh:101/106/120 数字统一到 16/33+2/49
+- 01-测试主管路由表加 15/16 expert 调度
+- NOTICE.md 5 处虚假标注改 "No license stated by upstream"
+
+**第 2 轮(2-3 天 — 发版阻断)**:
+- install.sh 列表替为 glob(`find ... -maxdepth 1 -name '*.md'` + 子目录 `rsync -a`)
+- install.sh 加 OSTYPE 分支处理 npm sudo + playwright --with-deps
+- install.sh 加 sandbox 守 `[[ "$TEMPLATE_DIR" == /tmp/* ]]` 防 rm -rf /
+- Q6 4 个代码 bug 修
+- Q8 9 项安全 + SA3 NEW-1/2/3/4 高危
+- runtime backends/__init__.py REGISTRY 注册修
+- runtime orchestrator `_upstream_outputs` 改 ContextVar
+- runtime orchestrator `RunnerResult.ok` 接 fallback 标 degraded
+- runtime backends SSH known_hosts + shell quote
+- runtime router LLM model ID 修对
+- runtime mcp evidence_vault path traversal 修
+- runtime api 加 auth + bind 127.0.0.1 + SSRF guard
+- runtime marketplace verifier 改名 `gate_syntax_check` + 删 "sandbox" 营销
+- SECURITY.md 加武器化代码用户责任段
+
+**第 3 轮(1 周+ — 质量)**:
+- runtime tests 加 FastAPI TestClient 全 endpoint + MCP per-server in-process test + CLI demo E2E
+- runtime web 实现 INDEX 承诺的 tus/SSE/Vitest,或下架承诺
+- 49 utils 加 `__all__`
+- chaos_helper / iot_helper / desktop_driver / prd_loader 高危项
+- FULL_GUIDE Split 为 CHARTER + ARCHITECTURE + USER_GUIDE
+- Dockerfile 加 non-root + HEALTHCHECK + .dockerignore;compose 改 .env 化 secret
+- marketplace 4 lane 真接入官方 plugin marketplace 协议或撤掉 "对标官方" 话术
+- _精髓库 私有路径泄漏跨 15 文件清理
+
+---
+
+## 10. 审查方法学注记 / 已知盲区
+
+**本审已覆盖**:
+- install.sh 全文 184 行 + 真机 reproduce(Windows MSYS)
+- 16 个 agent .md 全文(agents/)
+- 33 个 skill .md + 2 个子目录(skills/)
+- 49 utils .py(utils/)
+- runtime/ 22 子目录 / 132 个 .py 逐文件
+- FULL_GUIDE.md (1252 行) / 00-项目导航.md (416 行) / README*.md / CHANGELOG.md (369 行) / CONTRIBUTING.md (244 行) / Test-Agent工作流搭建.md / NOTICE.md / SECURITY.md / CODE_OF_CONDUCT.md / LICENSE 全读
+- discussions/ 6 文件全读(含 HANDOFF + 5 selftest log)
+- examples/ docs/ scripts/ marketplace/ profiles/ workspace/ config/ ci/ 全部
+- 8 个上游 GitHub 仓 License API 实测
+
+**已知盲区(本审未覆盖)**:
+- Test-Agent工作流搭建.md 2490 行做了首末段 + spot-check,未逐行
+- requirements.txt CVE-2026 编号未在 NVD/MITRE 真测
+- 同级目录 `D:\项目文件\TG云盘\` 是另一独立项目(含 backend/frontend/worker/nginx + docker-compose),与 Test-Agent 关系未审;`D:\项目文件\W4_DRAFTS\` 含 show-hn-draft.md / blog 草稿,可能与本仓发布物料相关,未审
+- runtime/tests 实际跑通率(本审仅静态读,未 `pytest runtime/tests/`)
+- runtime/ pip install -e 真跑可行性(基于 pyproject.toml entry_points 推断 OK,未实测)
+
+**baseline 已知误判(本审订正)**:
+- Q4 test-project/ 不在工作树
+- Q5 workspace/ 0 tracked
+- Q10 撞 anthropics/skills 官方 0/4,真风险是撞 Claude Code 内置 slash + 3 处伪造 ECC 派生
+- Q13.1 runtime/mcp/test_orchestrator/server.py 真实存在 + 6 件套全齐
+- Q13.3 Jenkins triple-quote 是正确写法;真问题是 webhook URL console 明文
+- Q14:50 trap rm 是 correct 模式(mktemp 父目录),但边界条件 mktemp 返空仍可炸
+- Q18.1 conftest:212-213 实际有 logger.warning 非静默
+
+---
+
+**全量问题清单完毕。**
+
+总计:P0 致命 **21 项**(基线 9 + 新增 12)/ P1 重要 **17 项**(基线 8 + 新增 9)/ P2 瑕疵 **20+ 项**(基线 11 + 新增 11)+ runtime/ 70+ 项 finding 跨 22 子目录 + 法律线 5 处虚假标注 + 1 处 0 字节 LICENSE-UPSTREAM。
+
+baseline 整体方向准、5 处需要订正、20+ 处严重度被低估。承诺-交付裂口比 baseline 描述更深、更广、更早——README 头条第一条命令就跑不通是最致命的发现。
+
+---
diff --git a/docs/theory/01-tools/pytest.en.md b/docs/theory/01-tools/pytest.en.md
index d636618..6f498e0 100644
--- a/docs/theory/01-tools/pytest.en.md
+++ b/docs/theory/01-tools/pytest.en.md
@@ -42,7 +42,7 @@ reading_en:
# pytest
-De facto Python testing standard. This project's `runtime/` uses pytest end-to-end; `04-配置文件/pytest.ini` is preconfigured.
+De facto Python testing standard. This project's `runtime/` uses pytest end-to-end; `config/pytest.ini` is preconfigured.
## Invocation in this project
- Any `runtime/tests/test_*.py` → `pytest runtime/tests/`
diff --git a/docs/theory/01-tools/pytest.zh.md b/docs/theory/01-tools/pytest.zh.md
index ae7527d..92fa9ff 100644
--- a/docs/theory/01-tools/pytest.zh.md
+++ b/docs/theory/01-tools/pytest.zh.md
@@ -43,7 +43,7 @@ reading_en:
# pytest
-Python 测试事实标准。本项目 `runtime/` 全栈 pytest;`04-配置文件/pytest.ini` 已配齐。
+Python 测试事实标准。本项目 `runtime/` 全栈 pytest;`config/pytest.ini` 已配齐。
## 在本项目调用
- 任何 `runtime/tests/test_*.py` 文件 → `pytest runtime/tests/`
diff --git a/docs/theory/05-methods/equivalence-partitioning.zh.md b/docs/theory/05-methods/equivalence-partitioning.zh.md
index f10fc88..2ddd0da 100644
--- a/docs/theory/05-methods/equivalence-partitioning.zh.md
+++ b/docs/theory/05-methods/equivalence-partitioning.zh.md
@@ -47,7 +47,7 @@ reading_en:
5. 配合**边界值分析**测临界点
## Test-Agent 用法
-- `testcase-designer` 专家(02-专家定义/03-用例设计.md)默认套此法
+- `testcase-designer` 专家(agents/03-用例设计.md)默认套此法
- Excel 输出 4 Sheet 含等价类表
## 为什么这么做?
diff --git a/docs/theory/07-platforms/desktop-testing-windows.zh.md b/docs/theory/07-platforms/desktop-testing-windows.zh.md
index f3cc90a..ea2cb58 100644
--- a/docs/theory/07-platforms/desktop-testing-windows.zh.md
+++ b/docs/theory/07-platforms/desktop-testing-windows.zh.md
@@ -47,7 +47,7 @@ reading_en:
| **Visual 层** | PyAutoGUI + OpenCV / Airtest OCR | 最脆,易碎 | 无 UIA 树时兜底(游戏/Canvas) |
## Test-Agent 路由逻辑
-被测物 PE32 → `desktop-tester` 专家(02-专家定义/11-桌面测试.md)→ `utils/desktop_driver.py` 调用 pywinauto。
+被测物 PE32 → `desktop-tester` 专家(agents/11-桌面测试.md)→ `utils/desktop_driver.py` 调用 pywinauto。
## 为什么 Agent 选 pywinauto 而非 Playwright?
- Playwright **只支持 Web/Electron**,不能直接驱动 Win32 进程
diff --git a/docs/theory/13-build-your-own/byox-shell.zh.md b/docs/theory/13-build-your-own/byox-shell.zh.md
index 5d1e482..4ad2123 100644
--- a/docs/theory/13-build-your-own/byox-shell.zh.md
+++ b/docs/theory/13-build-your-own/byox-shell.zh.md
@@ -23,7 +23,7 @@ reading_en: ["https://brennan.io/2015/01/16/write-a-shell-in-c/"]
# 对测试工作
-- **subprocess 测试**:`runtime/orchestrator/adapters/scripts.py` 用 subprocess 包 49 utils;懂 shell = 懂边界
+- **subprocess 测试**:`runtime/orchestrator/adapters/scripts.py` 用 subprocess 包 73 utils;懂 shell = 懂边界
- **信号**:测试中 SIGTERM/SIGINT 优雅退出
- **pipe**:测试命令链(`cmd1 | cmd2`)各自 stderr 独立
- **后台 / nohup**:测试持久化进程 / Daemon
diff --git a/docs/tutorial/TUTORIAL.md b/docs/tutorial/TUTORIAL.md
index 8e28454..622b1c2 100644
--- a/docs/tutorial/TUTORIAL.md
+++ b/docs/tutorial/TUTORIAL.md
@@ -48,8 +48,8 @@ tagent doctor --agents
验证 16 expert agents 全部就绪。输出类似:
```
-✓ 02-专家定义/01-测试主管.md test-lead
-✓ 02-专家定义/02-需求分析.md requirements-analyst
+✓ agents/01-测试主管.md test-lead
+✓ agents/02-需求分析.md requirements-analyst
... (16 agents total)
```
diff --git a/examples/INDEX.md b/examples/INDEX.md
index 9966776..4f25664 100644
--- a/examples/INDEX.md
+++ b/examples/INDEX.md
@@ -1,4 +1,4 @@
-# examples/ 索引(V1.10.0)
+# examples/ 索引(V1.42.0)
> 端到端可跑示例 · 让新人 5 分钟内看到 Test-Agent 实际工作。
diff --git a/examples/web-demo/README.md b/examples/web-demo/README.md
index 2eddd66..e56d8e5 100644
--- a/examples/web-demo/README.md
+++ b/examples/web-demo/README.md
@@ -71,7 +71,7 @@ examples/web-demo/
| 完整工作流 | 本 demo |
|-----------|---------|
-| 16 Agent + 32 Skill + 49 utils | 仅 pytest + playwright |
+| 16 Agent + 32 Skill + 78 utils | 仅 pytest + playwright |
| `.env` 配置 8 必填 | 不需 `.env` |
| Allure / JMeter / BugTracker 集成 | 不集成 |
| 冒烟 + 回归 + 性能门禁 | 仅 1 冒烟用例 |
diff --git a/examples/web-demo/conftest.py b/examples/web-demo/conftest.py
index e7d66fe..087fa00 100644
--- a/examples/web-demo/conftest.py
+++ b/examples/web-demo/conftest.py
@@ -2,7 +2,7 @@
"""
Web Demo 最小 conftest.py
仅含 Playwright browser/page fixture,演示 Page Object 模式接入。
-完整 Test-Agent 工作流 conftest 见 04-配置文件/conftest.py(含 EnvConfig / api_client / cleanup_tracker / 失败截图 hook 等)。
+完整 Test-Agent 工作流 conftest 见 config/conftest.py(含 EnvConfig / api_client / cleanup_tracker / 失败截图 hook 等)。
"""
import os
import pytest
diff --git a/examples/web-demo/tests/test_smoke.py b/examples/web-demo/tests/test_smoke.py
index 99b4312..ac34415 100644
--- a/examples/web-demo/tests/test_smoke.py
+++ b/examples/web-demo/tests/test_smoke.py
@@ -2,7 +2,7 @@
"""
P0 冒烟用例最小示例。
演示:pytest fixture 注入 + Page Object 调用 + 断言。
-完整 Test-Agent 工作流的 P0 冒烟门禁见 03-技能定义/smoke-test.md(≥95% 通过率)。
+完整 Test-Agent 工作流的 P0 冒烟门禁见 skills/smoke-test.md(≥95% 通过率)。
"""
import sys
from pathlib import Path
diff --git a/install.sh b/install.sh
index bbde32e..c9ea438 100644
--- a/install.sh
+++ b/install.sh
@@ -2,7 +2,7 @@
# Test-Agent 工作流一键部署脚本
#
# 安全提示:curl | bash 存在供应链风险。生产环境建议先 clone 仓库再本地执行:
-# git clone --depth 1 --branch v1.32.5 https://github.com/Wool-xing/Test-Agent.git
+# git clone --depth 1 --branch v1.42.0 https://github.com/Wool-xing/Test-Agent.git
# cd Test-Agent && bash install.sh /path/to/your-test-project
#
# 用法(远程一行,方便快速试用):
@@ -17,7 +17,7 @@ REPO_URL="${TEST_AGENT_REPO_URL:-https://github.com/Wool-xing/Test-Agent.git}"
REPO_BRANCH="${TEST_AGENT_REPO_BRANCH:-main}"
echo "=========================================="
-echo " Test-Agent 工作流一键部署 V1.32.5"
+echo " Test-Agent 工作流一键部署 V1.42.0"
echo " 仓库: $REPO_URL ($REPO_BRANCH)"
echo " 项目目录: $PROJECT_ROOT"
echo "=========================================="
@@ -28,7 +28,7 @@ PRESERVE_FILES=(".env" "workspace/测试数据/test_data.json"
"workspace/regression_modules.yaml")
BACKUP_DIR=""
if [[ -d "$PROJECT_ROOT" ]]; then
- BACKUP_DIR="$(mktemp -d -t test-agent-backup-XXXXXX)"
+ BACKUP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/test-agent-backup-XXXXXXXX")"
echo "→ 检测到已有项目,备份用户数据到 $BACKUP_DIR"
for f in "${PRESERVE_FILES[@]}"; do
if [[ -f "$PROJECT_ROOT/$f" ]]; then
@@ -52,7 +52,7 @@ restore_user_data() {
rm -rf "$BACKUP_DIR"
fi
}
-trap 'restore_user_data; [[ -n "${TEMPLATE_DIR:-}" ]] && rm -rf "$(dirname "$TEMPLATE_DIR")" 2>/dev/null || true' EXIT
+trap 'restore_user_data; [[ -n "${TEMPLATE_DIR:-}" ]] && rm -rf "$(dirname "$TEMPLATE_DIR")" 2>/dev/null' EXIT
# ===== 1. 检查工具 =====
need() { command -v "$1" >/dev/null 2>&1 || { echo "❌ 缺少 $1"; exit 1; }; }
@@ -113,18 +113,18 @@ mkdir -p "$PROJECT_ROOT"/workspace/执行日志/{allure-results,jmeter-results,j
# ===== 5. 拷贝 Agent / Skill 定义 =====
echo "→ 拷贝 Agent 定义..."
# Glob 全部 [0-9]*.md (业务 agent),自动覆盖未来新增
-find "$TEMPLATE_DIR/02-专家定义" -maxdepth 1 -name '[0-9]*.md' -exec cp {} "$PROJECT_ROOT/.claude/agents/" \;
+find "$TEMPLATE_DIR/agents" -maxdepth 1 -name '[0-9]*.md' -exec cp {} "$PROJECT_ROOT/.claude/agents/" \;
agent_count=$(ls "$PROJECT_ROOT/.claude/agents/"[0-9]*.md 2>/dev/null | wc -l)
echo " 已部署 $agent_count 个 Agent"
echo "→ 拷贝 Skill 定义..."
# Glob 顶层业务 skill (排除 README)
-find "$TEMPLATE_DIR/03-技能定义" -maxdepth 1 -name '*.md' ! -name 'README.md' -exec cp {} "$PROJECT_ROOT/.claude/skills/" \;
+find "$TEMPLATE_DIR/skills" -maxdepth 1 -name '*.md' ! -name 'README.md' -exec cp {} "$PROJECT_ROOT/.claude/skills/" \;
# 上游派生子目录 (darwin / karpathy-guidelines / nuwa)
# 注: 用 "${subdir%/}" 去 trailing / — macOS BSD cp 上 `cp -r darwin-skill/ dest/`
# 会展开内容到 dest/, 而非把 darwin-skill 整目录拷过去 (与 GNU cp 行为不同)。
# Linux GNU cp 上两种语法等价, 但 macOS 必须去 / 才能保证子目录结构。
-for subdir in "$TEMPLATE_DIR/03-技能定义"/*/; do
+for subdir in "$TEMPLATE_DIR/skills"/*/; do
[[ -d "$subdir" ]] && cp -r "${subdir%/}" "$PROJECT_ROOT/.claude/skills/"
done
skill_md_count=$(ls "$PROJECT_ROOT/.claude/skills/"*.md 2>/dev/null | wc -l)
@@ -133,37 +133,28 @@ echo " 已部署 $skill_md_count 个业务 Skill + $skill_dir_count 个元 Skil
# ===== 6. 配置文件 =====
echo "→ 拷贝配置文件..."
-cp "$TEMPLATE_DIR/04-配置文件/conftest.py" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/pytest.ini" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/.mcp.json" "$PROJECT_ROOT/"
-cp "$TEMPLATE_DIR/04-配置文件/requirements.txt" "$PROJECT_ROOT/"
-[[ -f "$PROJECT_ROOT/.env" ]] || cp "$TEMPLATE_DIR/04-配置文件/.env.example" "$PROJECT_ROOT/.env"
-
-# ===== 7. utils(49 个 .py + __init__)=====
-echo "→ 拷贝 utils(49 个)..."
-for f in __init__.py api_retry_util.py data_factory.py data_masking.py \
- excel_generator.py flaky_detector.py generate_report.py \
- jmeter_csv_exporter.py jmeter_result_parser.py \
- regression_scope.py zentao_bug_manager.py ci_quality_gate.py \
- mobile_driver.py miniprogram_runner.py desktop_driver.py \
- visual_helper.py iot_helper.py media_validator.py \
- tracing_validator.py mq_helper.py ai_validator.py \
- prd_loader.py websocket_helper.py protocol_helper.py \
- security_scanner.py network_throttle.py chaos_helper.py \
- soak_runner.py ux_metrics.py compatibility_matrix.py \
- state_machine_tester.py pairwise_generator.py bdd_runner.py \
- web_vitals_collector.py api_security_scanner.py fuzzer.py \
- db_test_helper.py contract_test.py openapi_test_gen.py \
- push_test.py a11y_scanner.py i18n_checker.py \
- mutation_runner.py dora_metrics.py blockchain_test.py ai_adversarial.py \
- slo_validator.py email_sender.py suite_minimizer.py; do
- cp "$TEMPLATE_DIR/05-代码示例/${f}" "$PROJECT_ROOT/utils/"
-done
+cp "$TEMPLATE_DIR/config/conftest.py" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/pytest.ini" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/.mcp.json" "$PROJECT_ROOT/"
+cp "$TEMPLATE_DIR/config/requirements.txt" "$PROJECT_ROOT/"
+[[ -f "$PROJECT_ROOT/.env" ]] || cp "$TEMPLATE_DIR/config/.env.example" "$PROJECT_ROOT/.env"
+
+# ===== 7. utils(自动扫描全部 .py 文件)=====
+echo "→ 拷贝 utils..."
+_count=0
+while IFS= read -r -d '' f; do
+ rel="${f#$TEMPLATE_DIR/utils/}"
+ dest="$PROJECT_ROOT/utils/$rel"
+ mkdir -p "$(dirname "$dest")"
+ cp "$f" "$dest"
+ _count=$((_count + 1))
+done < <(find "$TEMPLATE_DIR/utils" -name "*.py" -print0)
+echo " ✓ $_count 个 .py 文件已拷贝"
# ===== 8. CI/CD =====
echo "→ 拷贝 CI/CD..."
-cp "$TEMPLATE_DIR/06-CICD集成/github-actions-test.yml" "$PROJECT_ROOT/.github/workflows/test.yml"
-cp "$TEMPLATE_DIR/06-CICD集成/jenkins-pipeline.groovy" "$PROJECT_ROOT/Jenkinsfile"
+cp "$TEMPLATE_DIR/ci/github-actions-test.yml" "$PROJECT_ROOT/.github/workflows/test.yml"
+cp "$TEMPLATE_DIR/ci/jenkins-pipeline.groovy" "$PROJECT_ROOT/Jenkinsfile"
# ===== 8.5 顶层法律 / 治理 / 路线图文档 =====
echo "→ 拷贝法律 / 治理 / 路线图文档..."
@@ -196,8 +187,13 @@ export PYTHONIOENCODING=utf-8
# (B uv 待 upstream 修: 实测 uv + Tsinghua 组合协同有 bug, 未达预期 10x)
if [[ -z "${PIP_INDEX_URL:-}" ]]; then
is_cn=0
- case "${LANG:-}" in zh*|*CN*|*GB*) is_cn=1 ;; esac
- [[ "$(date +%z 2>/dev/null)" == "+0800" ]] && is_cn=1
+ # 允许显式跳过 CN 镜像: TEST_AGENT_NO_CN_MIRROR=1 ./install.sh ...
+ if [[ "${TEST_AGENT_NO_CN_MIRROR:-0}" == "1" ]]; then
+ is_cn=0
+ else
+ case "${LANG:-}" in zh*|*CN*|*GB*) is_cn=1 ;; esac
+ [[ "$(date +%z 2>/dev/null)" == "+0800" ]] && is_cn=1
+ fi
if [[ $is_cn -eq 1 ]]; then
echo "→ 检测到 CN 环境, 用清华 PyPI 镜像加速 (export PIP_INDEX_URL=... 可覆盖)"
export PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
diff --git a/mobile/package.json b/mobile/package.json
index 2e06088..48c5167 100644
--- a/mobile/package.json
+++ b/mobile/package.json
@@ -1,6 +1,6 @@
{
"name": "test-agent-mobile",
- "version": "1.32.0",
+ "version": "1.42.0",
"description": "Test-Agent Mobile — AI Testing Framework for iOS & Android",
"scripts": {
"build": "cd ../runtime/web && npm run build",
diff --git a/requirements/README.md b/requirements/README.md
new file mode 100644
index 0000000..e99f79b
--- /dev/null
+++ b/requirements/README.md
@@ -0,0 +1,24 @@
+# 按需安装 — 依赖分层
+
+> Phase 2 已交付。六层分层文件已就绪。
+
+## 六层结构
+
+| 层 | 文件 | 触发条件 | 安装命令 |
+|----|------|---------|---------|
+| base | `requirements/base.txt` | 永远装 | `pip install -r requirements/base.txt` |
+| mobile | `requirements/mobile.txt` | 选择 mobile | `pip install -r requirements/mobile.txt` |
+| desktop | `requirements/desktop.txt` | 选择 desktop | `pip install -r requirements/desktop.txt` |
+| visual | `requirements/visual.txt` | 选择 visual | `pip install -r requirements/visual.txt` |
+| system | `requirements/system.txt` | 选择 IoT/音视频 | `pip install -r requirements/system.txt` |
+| ai | `requirements/ai.txt` | 选择 AI/LLM | `pip install -r requirements/ai.txt` |
+| perf | `requirements/perf.txt` | 选择性能 | `pip install -r requirements/perf.txt` |
+
+每层文件通过 `-r base.txt` 引用基础依赖,避免重复定义。
+
+## 设计原则
+
+- 不强迫 mobile 用户装 desktop 工具
+- 运行时缺依赖→反问用户是否补装,不静默自动装
+- 补装走 `pip install --upgrade-strategy only-if-needed`
+- `config/requirements.txt` 保留作为全量安装参考
diff --git a/requirements/ai.txt b/requirements/ai.txt
new file mode 100644
index 0000000..7fb4bbc
--- /dev/null
+++ b/requirements/ai.txt
@@ -0,0 +1,10 @@
+# ===== ai — AI/ML 模型 + LLM 测试 =====
+# 安装: pip install -r requirements/ai.txt
+-r base.txt
+
+scikit-learn==1.5.2
+scipy==1.13.1
+# alibi-detect==0.12.0 # 漂移高级检测
+# deepeval==0.20.50 # LLM 评估
+# foolbox==3.3.4 # CV 对抗
+# adversarial-robustness-toolbox==1.17.1
diff --git a/requirements/base.txt b/requirements/base.txt
new file mode 100644
index 0000000..8992cef
--- /dev/null
+++ b/requirements/base.txt
@@ -0,0 +1,64 @@
+# ===== base — 始终安装,测试框架核心 =====
+# 被所有上层 requirements 文件通过 -r base.txt 引用。
+
+# 测试框架核心
+pytest>=9.0.3
+pytest-xdist>=3.6.1
+pytest-rerunfailures>=14.0
+pytest-timeout>=2.3.1
+pytest-cov>=5.0.0
+pytest-mock>=3.14.0
+pytest-playwright>=0.5.2
+allure-pytest>=2.13.5
+pytest-bdd==7.0.0
+
+# UI 自动化
+playwright==1.59.0
+
+# API 测试
+requests==2.33.0
+
+# 测试数据工厂
+faker==20.1.0
+factory-boy==3.3.0
+
+# 数据库
+psycopg2-binary==2.9.12
+pymysql==1.1.3
+SQLAlchemy==2.0.49
+
+# 配置
+PyYAML==6.0.1
+
+# Excel/文档
+openpyxl==3.1.2
+python-docx==1.1.0
+pdfplumber==0.10.3
+pypdf==6.10.2
+beautifulsoup4==4.12.2
+
+# 工具
+python-dotenv==1.2.2
+tenacity==8.2.3
+loguru==0.7.2
+
+# 契约测试
+jsonschema==4.21.0
+
+# 报告
+reportlab==4.0.7
+python-pptx==0.6.23
+
+# 安全扫描
+bandit==1.7.6
+safety==3.0.1
+
+# WebSocket(通用)
+websocket-client==1.8.0
+websockets==12.0
+
+# 进程/系统
+psutil==5.9.6
+
+# HTTP 异步
+httpx[http2]>=0.27.0
diff --git a/requirements/desktop.txt b/requirements/desktop.txt
new file mode 100644
index 0000000..7056e42
--- /dev/null
+++ b/requirements/desktop.txt
@@ -0,0 +1,8 @@
+# ===== desktop — 桌面端测试 =====
+# 安装: pip install -r requirements/desktop.txt
+-r base.txt
+
+pyautogui==0.9.54
+mss>=7.0.1,<10
+# pywinauto==0.6.8 # 仅 Windows — 取消注释启用
+# uiautomation==2.0.20 # 仅 Windows — 取消注释启用
diff --git a/requirements/mobile.txt b/requirements/mobile.txt
new file mode 100644
index 0000000..a09bdf5
--- /dev/null
+++ b/requirements/mobile.txt
@@ -0,0 +1,6 @@
+# ===== mobile — 移动端测试 =====
+# 安装: pip install -r requirements/mobile.txt
+-r base.txt
+
+Appium-Python-Client==5.3.1
+selenium==4.43.0
diff --git a/requirements/perf.txt b/requirements/perf.txt
new file mode 100644
index 0000000..72f7099
--- /dev/null
+++ b/requirements/perf.txt
@@ -0,0 +1,6 @@
+# ===== perf — 性能测试 =====
+# 安装: pip install -r requirements/perf.txt
+-r base.txt
+
+locust==2.43.4
+# mutmut==2.4.5 # 变异测试
diff --git a/requirements/system.txt b/requirements/system.txt
new file mode 100644
index 0000000..1d67294
--- /dev/null
+++ b/requirements/system.txt
@@ -0,0 +1,13 @@
+# ===== system — IoT/音视频/消息队列/区块链 =====
+# 安装: pip install -r requirements/system.txt
+-r base.txt
+
+paramiko==4.0.0
+pyserial==3.5
+paho-mqtt==1.6.1
+ffmpeg-python==0.2.0
+kafka-python==2.0.2
+pika==1.3.2
+# pymodbus==3.5.4 # Modbus 工业协议
+# web3==6.15.1 # 以太坊/EVM
+# slither-analyzer==0.10.0 # 需 solc
diff --git a/requirements/visual.txt b/requirements/visual.txt
new file mode 100644
index 0000000..0ce11d9
--- /dev/null
+++ b/requirements/visual.txt
@@ -0,0 +1,11 @@
+# ===== visual — 视觉/游戏测试 =====
+# 安装: pip install -r requirements/visual.txt
+-r base.txt
+
+opencv-python==4.13.0.92
+scikit-image==0.24.0
+imagehash==4.3.2
+pytesseract==0.3.10
+Pillow==12.2.0
+# airtest==1.4.3 # 冲突 — 需隔离 venv
+# paddleocr==2.7.0.3 # 备选 OCR(重,按需)
diff --git a/runtime/ARCHITECTURE.md b/runtime/ARCHITECTURE.md
new file mode 100644
index 0000000..53523d2
--- /dev/null
+++ b/runtime/ARCHITECTURE.md
@@ -0,0 +1,108 @@
+# 运行时层架构
+
+> 2026-05-11 立项,在不改 14 专家 / 13 Skill / 67 脚本前提下,新增可执行运行时,让"文档+脚本工具箱" → "可被 API/CLI/CI 直接调用的产品"。
+
+## 战略判断
+
+- "全平台/全协议/全测试类型/全行业全覆盖" = 项目死亡信号。Selenium/Postman/k6/JMeter 均单点打透赢
+- 已有 14 专家+13 Skill 编排骨架 = 真护城河,真正稀缺是 **智能编排层 + 数据飞轮**
+- 三阶段串行打通,门槛守严:
+ - **B(M1-M6) QA 团队编排平台** — 摘已有资产最低果实
+ - **A(M7-M12) 开发者自测**(IDE 插件) — 需 B 飞轮数据喂养再打
+ - **C(M13-M18) CI 中间件**(原生集成 Jenkins/GitLab/Argo/Tekton/K8s Operator) — 需 A+B 背书
+
+## 双层架构
+
+| 层 | 内容 | 谁维护 |
+|----|------|--------|
+| **L1 核心闭包** | 测试编排引擎 / 14 专家+调度器 / 输入解析器 / 输出渲染器 / MCP 协议层 / 飞轮 / AI 路由 | 自己 |
+| **L2 扩展面** | 协议适配器市场 / 测试类型 Skill 包 / 行业 Profile / 工具桥 / 报告模板 | 社区/插件/AI 生成 |
+
+## 八维测试矩阵
+
+| 维度 | 取值 |
+|------|------|
+| 平台 | Web/移动/桌面/嵌入式/云原生/中间件/DB/AI模型/区块链/IoT/工控 |
+| 协议 | HTTP(S)/gRPC/WS/TCP/UDP/MQTT/AMQP/Kafka/Redis/SOAP/GraphQL/Modbus/CAN |
+| 测试类型 | 单元/集成/E2E/UI/API/性能/压力/容量/混沌/安全/渗透/模糊/合规/可访问性/兼容/本地化/可用性/视觉回归/契约/可观测 |
+| 流程 | 需求评审 → 用例 → 数据/Mock → 执行 → 缺陷 → 回归 → 上线监控 |
+| 自动化层 | 录制 / 手写 / AI 生成 / AI 自愈 / 自主决策 |
+| 部署 | 本地/Docker/K8s/Serverless/边缘 |
+| Profile | 通用层做厚 + 行业 Profile 留扩展位 |
+| 智能等级 | L0 脚本 → L1 数据驱动 → L2 关键字 → L3 AI 辅助 → L4 自主决策 |
+
+**警告**:不要让维度交叉乘积爆炸成 N^8 测试包;AI 路由按需取交集。
+
+## 6 个 MCP 服务规划
+
+| MCP | 职责 | 状态 |
+|-----|------|------|
+| `mcp-test-orchestrator` | 主调度,被测物→专家组合 | M2 上线 |
+| `mcp-protocol-adapter` | 协议层统一抽象 | M2 上线 |
+| `mcp-evidence-vault` | 证据/录屏/日志 | M2 上线 |
+| `mcp-defect-tracker` | 工单桥(Jira/禅道/PingCode/飞书) | M2 复用现有 |
+| `mcp-knowledge-base` | 历史用例+缺陷+RCA 向量检索 | M2 起步 |
+| `mcp-compliance-checker` | 行业合规规则库(空载,L2 扩展) | M3 |
+
+## 选型
+
+| 项 | 选型 |
+|----|------|
+| LLM 抽象 | **LiteLLM** 多厂商 + Ollama 兜底 + stub(测试) |
+| 编排引擎 | **Prefect 2.x** + 自研 Direct 降级执行器(无 Prefect 也能跑) |
+| 执行器底层 | Pytest 复用(67 脚本本就是 pytest 生态) |
+| DB | Postgres + **pgvector** |
+| 对象存储 | MinIO |
+| 报表 OLAP | ClickHouse(M3 上,M1 不急) |
+| API | FastAPI + Pydantic v2 |
+| CLI | Typer + Rich |
+| 观测 | OpenTelemetry + Loguru |
+| UI | M3 上,M1 仅 CLI |
+| 开源时机 | **M3 上运行时再开源** |
+
+## M1 交付清单
+
+| # | 模块 | 路径 | 状态 |
+|---|------|------|------|
+| 1 | 目录骨架+pyproject | `runtime/` | ✅ |
+| 2 | AI 路由 v1 | `runtime/router/` | ✅ stub 5/5 类型 |
+| 3 | 注册中心 | `runtime/registry/` | ✅ 14+13 实跑验证 |
+| 4 | 编排(Prefect+Direct) | `runtime/orchestrator/` | ✅ E2E 通 |
+| 5 | FastAPI 入口 | `runtime/api/` | ✅ 6 端点 |
+| 6 | Typer CLI | `runtime/cli/` | ✅ `tagent run|plan|catalog|doctor` |
+| 7 | 飞轮 schema | `runtime/storage/` | ✅ 6 表 + Alembic |
+| 8 | OTel+Loguru | `runtime/observability/` | ✅ |
+| 9 | docker-compose | `runtime/docker-compose.yml` | ✅ 含 observability profile |
+| 10 | E2E smoke | 验证脚本 | ✅ 路由 5/5 + DAG 8 节点 direct 模式跑通 |
+| 11 | 文档同步 | 本节 + README + FULL_GUIDE + CHANGELOG + VERSION + 00-导航 | ✅ |
+
+## 八维路由验证
+
+| 输入 | 期望 | 实测 |
+|------|------|------|
+| `Web system https://example.com login flow` | web-system | ✓ web-system + 8 专家 |
+| `REST API gRPC endpoints to test` | rest-api | ✓ rest-api + 6 专家 |
+| `APK mobile Android app` | mobile-app | ✓ mobile-app + mobile-tester |
+| `Windows desktop exe app` | desktop-app | ✓ desktop-app + desktop-tester |
+| `LLM AI model evaluation pipeline` | ai-model | ✓ ai-model + ai-tester |
+
+**stub 准确率 = 5/5 = 100%**(自包含,不出网)。M1 真模型门槛 ≥85%,M2 双模型投票。
+
+## M2 路线图
+
+| 任务 | 内容 |
+|------|------|
+| MCP 6 件套 | `mcp-test-orchestrator/-protocol-adapter/-evidence-vault/-defect-tracker/-knowledge-base/-compliance-checker` 上线 |
+| Web UI | 单页 React:上传被测物 → 看 DAG 实时进度 → 看报告 → 看证据 |
+| 真模型路由 | Claude+Qwen 实测,准确率 ≥85% |
+| 协议适配器 | HTTP/gRPC/WS/MQTT/Kafka 5 协议起步 |
+| 行业 Profile 插槽 | `profiles/general-web.yaml` 示例 + 加 Profile 文档 |
+
+## 放弃条件
+
+- W1 末:骨架+注册没完成 → 慢一周接受
+- W3 末:路由+编排没贯通 → 砍 OTel+ClickHouse,优先打通
+- W5 末:E2E demo 跑不通 → 砍移动/AI 专家,只跑 Web+API
+- W6 末:文档没同步 → **不准 bump 版本**
+- 客户 <2 → 砍 A,固守 B
+- DAU < 1000(A 阶段) → 加固 B,不进 C
diff --git a/runtime/INDEX.md b/runtime/INDEX.md
index 2367aa7..0b22c33 100644
--- a/runtime/INDEX.md
+++ b/runtime/INDEX.md
@@ -1,12 +1,12 @@
# runtime 索引
> Test-Agent 运行时层(V1.1.0 新增)。
-> 顶层导航见根目录 `00-项目导航.md`;runtime 完整章节见 `docs/charter/07-runtime-license.md`;总索引见 `FULL_GUIDE.md`。
+> 顶层导航见根目录 `00-项目导航.md`;运行时完整章节见 `docs/charter/07-runtime-license.md`;架构设计见 [`ARCHITECTURE.md`](ARCHITECTURE.md)。
## 定位
-把 16 专家定义 + 32 业务 Skill + 3 元 Skill + 49 脚本 从"文档+工具箱"升级为"可执行运行时"。
-本层 **不动** `02-专家定义/` `03-技能定义/` `05-代码示例/` 已有内容,仅作调度。
+把 16 专家定义 + 32 业务 Skill + 3 元 Skill + 67 脚本 从"文档+工具箱"升级为"可执行运行时"。
+本层 **不动** `agents/` `skills/` `utils/` 已有内容,仅作调度。
## 模块清单
@@ -46,9 +46,9 @@ runtime/router ← 新增,AI 决策
▼
runtime/orchestrator ← 新增,Prefect 编排
│
- ├─► 02-专家定义/*.md ← 已有,文档→Claude Code 加载
- ├─► 03-技能定义/*.md ← 已有,文档→Skill 调用
- └─► 05-代码示例/*.py ← 已有,49 脚本(adapter 包装)
+ ├─► agents/*.md ← 已有,文档→Claude Code 加载
+ ├─► skills/*.md ← 已有,文档→Skill 调用
+ └─► utils/*.py ← 已有,67 脚本(adapter 包装)
│
▼
runtime/storage 飞轮 ← 新增,数据沉淀
diff --git a/runtime/__init__.py b/runtime/__init__.py
index 7ece93d..b64353d 100644
--- a/runtime/__init__.py
+++ b/runtime/__init__.py
@@ -1,7 +1,7 @@
-"""Test-Agent runtime layer (V1.32.5).
+"""Test-Agent runtime layer (V1.40.0).
AI router + Prefect orchestrator + FastAPI/CLI entry + flywheel storage.
-Wraps 16 experts + 32 skills + 49 utils without modifying them.
+Wraps 16 experts + 32 skills + 76 utils without modifying them.
"""
-__version__ = "1.32.5"
+__version__ = "1.40.0"
diff --git a/runtime/api/correlation.py b/runtime/api/correlation.py
index 2428617..6a2555c 100644
--- a/runtime/api/correlation.py
+++ b/runtime/api/correlation.py
@@ -12,7 +12,6 @@
from starlette.requests import Request
from starlette.responses import Response
-
HEADER_REQUEST_ID = "X-Request-ID"
HEADER_CORRELATION_ID = "X-Correlation-ID"
diff --git a/runtime/api/deps.py b/runtime/api/deps.py
index 96d47ad..ac02c3c 100644
--- a/runtime/api/deps.py
+++ b/runtime/api/deps.py
@@ -2,7 +2,6 @@
from __future__ import annotations
-import json
from pathlib import Path
from typing import Any
@@ -14,8 +13,8 @@
from runtime.router.llm_client import LLMClient
from runtime.router.router import route
from runtime.router.schema import RoutingDecision, TargetArtifact
-from runtime.storage.repo import create_run, set_run_status
from runtime.storage.models import RunStatus
+from runtime.storage.repo import create_run, set_run_status
class Kernel:
diff --git a/runtime/api/endpoints/stream.py b/runtime/api/endpoints/stream.py
index 90b8d9d..6d86181 100644
--- a/runtime/api/endpoints/stream.py
+++ b/runtime/api/endpoints/stream.py
@@ -6,7 +6,7 @@
from __future__ import annotations
import asyncio
-import json
+import contextlib
import time
from typing import Any
@@ -44,9 +44,7 @@ def expired(self) -> bool:
def get_or_create_stream(run_id: str) -> RunStream:
- if run_id not in _streams:
- _streams[run_id] = RunStream(run_id)
- return _streams[run_id]
+ return _streams.setdefault(run_id, RunStream(run_id))
def push_node_event(run_id: str, node_id: str, status: str, output: dict | None = None) -> None:
@@ -54,15 +52,13 @@ def push_node_event(run_id: str, node_id: str, status: str, output: dict | None
stream = _streams.get(run_id)
if stream is None:
return
- try:
+ with contextlib.suppress(RuntimeError):
asyncio.ensure_future(stream.push({
"type": "node_update",
"node_id": node_id,
"status": status, # pending | running | done | failed | skipped
"output": output,
}))
- except RuntimeError:
- pass # No running event loop — stream not active
def push_run_complete(run_id: str, ok: bool, summary: dict | None = None) -> None:
@@ -70,14 +66,12 @@ def push_run_complete(run_id: str, ok: bool, summary: dict | None = None) -> Non
stream = _streams.get(run_id)
if stream is None:
return
- try:
+ with contextlib.suppress(RuntimeError):
asyncio.ensure_future(stream.push({
"type": "run_complete",
"ok": ok,
"summary": summary,
}))
- except RuntimeError:
- pass
def cleanup_stream(run_id: str) -> None:
@@ -103,10 +97,8 @@ async def stream_run(websocket: WebSocket, run_id: str):
await websocket.send_json({"type": "heartbeat", "run_id": run_id})
# Check if client disconnected
- try:
+ with contextlib.suppress(asyncio.TimeoutError):
_ = await asyncio.wait_for(websocket.receive_text(), timeout=0.01)
- except asyncio.TimeoutError:
- pass
except WebSocketDisconnect:
logger.info("WebSocket stream disconnected for run {}", run_id)
diff --git a/runtime/api/main.py b/runtime/api/main.py
index f7c05de..426ebcb 100644
--- a/runtime/api/main.py
+++ b/runtime/api/main.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import secrets
import tempfile
import threading
from pathlib import Path
@@ -13,22 +14,24 @@
from loguru import logger
from runtime import __version__
+from runtime.api.correlation import CorrelationMiddleware
from runtime.api.deps import Kernel
-from runtime.api.models import CatalogResponse, RunCreateText, RunCreated, RunStatus as RunStatusModel
+from runtime.api.endpoints.cancel import router as cancel_router
+from runtime.api.endpoints.stream import router as stream_router
+from runtime.api.models import CatalogResponse, RunCreated, RunCreateText
+from runtime.api.models import RunStatus as RunStatusModel
from runtime.api.parsers import parse_path, parse_text, parse_url
+from runtime.api.result_store import ResultStore
from runtime.config.settings import get_settings
from runtime.observability.prometheus_metrics import create_metrics_router
-from runtime.api.correlation import CorrelationMiddleware
-from runtime.api.endpoints.cancel import router as cancel_router, register_run, unregister_run
-from runtime.api.endpoints.stream import router as stream_router
-from runtime.api.result_store import ResultStore
_settings = get_settings()
app = FastAPI(title="Test-Agent Runtime", version=__version__)
app.add_middleware(
CORSMiddleware,
- allow_origins=["http://localhost:*", "http://127.0.0.1:*", "tauri://localhost"],
+ allow_origins=["tauri://localhost"],
+ allow_origin_regex=r"https?://(localhost|127\.0\.0\.1)(:\d+)?",
allow_methods=["GET", "POST"],
allow_headers=["Content-Type", "Authorization"],
)
@@ -48,7 +51,7 @@ async def auth_middleware(request: Request, call_next: Any) -> Any:
token = _settings.api_auth_token
if token and request.url.path not in ("/health", "/docs", "/openapi.json"):
auth = request.headers.get("Authorization", "")
- if not auth or auth.removeprefix("Bearer ") != token:
+ if not auth or not secrets.compare_digest(auth.removeprefix("Bearer "), token):
return JSONResponse(status_code=401, content={"detail": "unauthorized"})
return await call_next(request)
@@ -93,7 +96,7 @@ def run_text(payload: RunCreateText, bg: BackgroundTasks, mode: str = "exec", la
@app.post("/run/file", response_model=RunCreated)
-async def run_file(file: UploadFile = File(..., max_length=50_000_000), extra: str = Form("")) -> RunCreated:
+async def run_file(file: UploadFile = File(..., max_length=50_000_000), bg: BackgroundTasks = None, extra: str = Form("")) -> RunCreated: # type: ignore[assignment] # noqa: B008
suffix = Path(file.filename or "upload").suffix.lower()
allowed = {".md", ".txt", ".pdf", ".docx", ".xlsx", ".zip", ".png", ".jpg", ".jpeg", ".html", ".json", ".yml", ".yaml", ".py", ".js", ".ts", ".apk", ".ipa"}
if suffix not in allowed:
@@ -105,10 +108,7 @@ async def run_file(file: UploadFile = File(..., max_length=50_000_000), extra: s
if extra:
art.text = (art.text or "") + "\n\n# User note:\n" + extra
run_id, decision = _kernel.submit(art)
- # Kick off in same process pool; fire-and-forget for v1 simplicity.
- import threading
-
- threading.Thread(target=_run_in_background, args=(run_id, decision), daemon=True).start()
+ bg.add_task(_run_in_background, run_id, decision)
return RunCreated(
run_id=run_id,
decision_summary={
@@ -207,7 +207,7 @@ def list_history() -> dict:
"duration_s": data.get("duration_s", data.get("duration_ms", 0) / 1000 if "duration_ms" in data else 0),
"confidence": data.get("confidence", 0),
})
- except (OSError, json.JSONDecodeError, ValueError) as e:
+ except (OSError, _json.JSONDecodeError, ValueError) as e:
logger.warning("skipping unreadable run file {}: {}", f, e)
return {"runs": runs[:50]}
@@ -226,12 +226,12 @@ def _run_in_background(run_id: str, decision) -> None:
try:
summary = _kernel.execute_sync(run_id, decision)
with _run_lock:
- _run_results[run_id] = summary
+ _run_results.put(run_id, summary)
except Exception: # noqa: BLE001
logger.exception("background run {} failed", run_id)
with _run_lock:
- _run_results[run_id] = {
+ _run_results.put(run_id, {
"error": f"run {run_id} failed — check logs at workspace/ or run with --debug",
"run_id": run_id,
"failed": 1, "succeeded": 0, "total": 0, "status": "error",
- }
+ })
diff --git a/runtime/api/rbac.py b/runtime/api/rbac.py
index 32f954e..570a2b0 100644
--- a/runtime/api/rbac.py
+++ b/runtime/api/rbac.py
@@ -14,9 +14,9 @@ def admin_only(): ...
from __future__ import annotations
import os
+from collections.abc import Callable
from enum import Enum
from functools import wraps
-from typing import Callable, List, Optional
from loguru import logger
@@ -65,7 +65,7 @@ def _rbac_enabled() -> bool:
return os.getenv("TAGENT_RBAC_ENABLED", "0") == "1"
-def resolve_role(token: str) -> Optional[Role]:
+def resolve_role(token: str) -> Role | None:
"""Resolve a bearer token to a role. Returns None if RBAC disabled or token unknown."""
if not _rbac_enabled():
return Role.ADMIN # when off, everyone is admin (backward compat)
diff --git a/runtime/api/result_store.py b/runtime/api/result_store.py
index 64c1cff..a03c03b 100644
--- a/runtime/api/result_store.py
+++ b/runtime/api/result_store.py
@@ -21,7 +21,7 @@ def __init__(self, max_entries: int = 1000, ttl_seconds: int = 86400) -> None:
self._max = max_entries
self._ttl = ttl_seconds
self._store: OrderedDict[str, tuple[float, dict[str, Any]]] = OrderedDict()
- self._lock = threading.Lock()
+ self._lock = threading.RLock()
def put(self, run_id: str, result: dict[str, Any]) -> None:
"""Store a result. Evicts oldest if over capacity."""
diff --git a/runtime/api/tenancy.py b/runtime/api/tenancy.py
index cbb513f..3180ebb 100644
--- a/runtime/api/tenancy.py
+++ b/runtime/api/tenancy.py
@@ -13,9 +13,6 @@
import contextvars
import os
-from typing import Optional
-
-from loguru import logger
_current_tenant: contextvars.ContextVar[str | None] = contextvars.ContextVar(
"current_tenant", default=None
diff --git a/runtime/backends/__init__.py b/runtime/backends/__init__.py
index c20c931..aad95b2 100644
--- a/runtime/backends/__init__.py
+++ b/runtime/backends/__init__.py
@@ -4,16 +4,15 @@
Use `get_backend(name)` to obtain an adapter implementing BaseExecutionEnv.
"""
-from runtime.backends.base import BaseExecutionEnv, REGISTRY, get_backend, register # noqa: F401
-
# 触发 7 个 backend 的 @register("xxx") 装饰器,填充 REGISTRY
# 不导入这些模块 → REGISTRY 永空 → get_backend("local") KeyError 启动崩 (W4-4 修)
from runtime.backends import ( # noqa: F401, E402
- local,
+ daytona,
docker,
- ssh,
- singularity,
+ local,
modal,
- daytona,
+ singularity,
+ ssh,
vercel_sandbox,
)
+from runtime.backends.base import REGISTRY, BaseExecutionEnv, get_backend, register # noqa: F401
diff --git a/runtime/backends/daytona.py b/runtime/backends/daytona.py
index 923ff3d..fff0e52 100644
--- a/runtime/backends/daytona.py
+++ b/runtime/backends/daytona.py
@@ -5,8 +5,6 @@
import time
from pathlib import Path
-from loguru import logger
-
from runtime.backends.base import BaseExecutionEnv, ExecResult, register
diff --git a/runtime/backends/docker.py b/runtime/backends/docker.py
index 46c16b3..489a639 100644
--- a/runtime/backends/docker.py
+++ b/runtime/backends/docker.py
@@ -39,7 +39,7 @@ async def exec(self, cmd: str, *, timeout: float = 60.0, cwd: str | None = None,
argv += ["-w", cwd]
for k, v in (env or {}).items():
argv += ["-e", f"{k}={v}"]
- argv += [self.container, "sh", "-lc", cmd]
+ argv += [self.container, "sh", "-lc", shlex.quote(cmd)]
rc, out, err = await self._run(argv, timeout=timeout)
return ExecResult(ok=rc == 0, stdout=out, stderr=err, returncode=rc, elapsed_ms=int((time.monotonic() - start) * 1000))
diff --git a/runtime/backends/ssh.py b/runtime/backends/ssh.py
index aed6a49..9f1d42a 100644
--- a/runtime/backends/ssh.py
+++ b/runtime/backends/ssh.py
@@ -27,7 +27,7 @@ async def connect(self) -> None:
except ImportError as e:
raise RuntimeError("asyncssh not installed; pip install asyncssh") from e
self._conn = await asyncssh.connect(
- self.host, port=self.port, username=self.user, client_keys=[self.key] if self.key else None, password=self.password, known_hosts=()
+ self.host, port=self.port, username=self.user, client_keys=[self.key] if self.key else None, password=self.password, known_hosts=None
)
logger.info("SSH connected: {}@{}:{}", self.user, self.host, self.port)
@@ -35,7 +35,7 @@ async def exec(self, cmd: str, *, timeout: float = 60.0, cwd: str | None = None,
start = time.monotonic()
full = cmd
if cwd:
- full = f"cd {shlex.quote(cwd)} && {cmd}"
+ full = f"cd {shlex.quote(cwd)} && {shlex.quote(cmd)}"
if env:
env_str = " ".join(f"{shlex.quote(k)}={shlex.quote(v)}" for k, v in env.items())
full = f"{env_str} {full}"
@@ -52,14 +52,12 @@ async def exec(self, cmd: str, *, timeout: float = 60.0, cwd: str | None = None,
return ExecResult(ok=False, stdout="", stderr=str(e), returncode=None, elapsed_ms=int((time.monotonic() - start) * 1000))
async def read(self, path: str) -> bytes:
- async with self._conn.start_sftp_client() as sftp:
- async with sftp.open(path, "rb") as f:
- return await f.read()
+ async with self._conn.start_sftp_client() as sftp, sftp.open(path, "rb") as f:
+ return await f.read()
async def write(self, path: str, data: bytes) -> None:
- async with self._conn.start_sftp_client() as sftp:
- async with sftp.open(path, "wb") as f:
- await f.write(data)
+ async with self._conn.start_sftp_client() as sftp, sftp.open(path, "wb") as f:
+ await f.write(data)
async def sync_in(self, local: Path, remote: str) -> None:
async with self._conn.start_sftp_client() as sftp:
diff --git a/runtime/backends/vercel_sandbox.py b/runtime/backends/vercel_sandbox.py
index 8672d2a..0f5777d 100644
--- a/runtime/backends/vercel_sandbox.py
+++ b/runtime/backends/vercel_sandbox.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import contextlib
import time
from pathlib import Path
@@ -82,10 +83,8 @@ async def sync_out(self, remote: str, local: Path) -> None:
async def close(self) -> None:
if self._client and self._sandbox_id:
- try:
+ with contextlib.suppress(Exception):
await self._client.delete(f"/v1/sandboxes/{self._sandbox_id}")
- except Exception:
- pass
if self._client:
await self._client.aclose()
self._client = None
diff --git a/runtime/cli/_shared.py b/runtime/cli/_shared.py
index 99158e1..5d1df2d 100644
--- a/runtime/cli/_shared.py
+++ b/runtime/cli/_shared.py
@@ -2,18 +2,15 @@
from __future__ import annotations
-import json
import os
import sys
from pathlib import Path
-import typer
from rich.console import Console
from rich.table import Table
from runtime.api.deps import Kernel
from runtime.api.parsers import parse_path, parse_text, parse_url
-from runtime.config.settings import get_settings
# Fix Unicode and SSL on Windows
if sys.platform == "win32":
@@ -130,6 +127,7 @@ def print_dag(decision):
def ping_db():
try:
from sqlalchemy import text
+
from runtime.storage.db import get_engine
with get_engine().connect() as c:
c.execute(text("SELECT 1"))
diff --git a/runtime/cli/commands/catalog.py b/runtime/cli/commands/catalog.py
index 529012f..bbaeb3d 100644
--- a/runtime/cli/commands/catalog.py
+++ b/runtime/cli/commands/catalog.py
@@ -5,7 +5,7 @@
import typer
from rich.table import Table
-from runtime.cli._shared import console, _kernel
+from runtime.cli._shared import _kernel, console
def register(app: typer.Typer) -> None:
diff --git a/runtime/cli/commands/demo.py b/runtime/cli/commands/demo.py
index 76fc536..14fc61a 100644
--- a/runtime/cli/commands/demo.py
+++ b/runtime/cli/commands/demo.py
@@ -10,7 +10,7 @@
import typer
from runtime.api.parsers import parse_path
-from runtime.cli._shared import console, _SMOKE_PRD_FIXTURE
+from runtime.cli._shared import _SMOKE_PRD_FIXTURE, console
def register(app: typer.Typer) -> None:
@@ -32,9 +32,8 @@ def demo(
provider = os.getenv("TAGENT_LLM_PROVIDER", "(unset)")
console.print(f"[bold yellow]⚠ --real-llm mode[/] provider={provider}")
console.print(" · Real LLM calls ~$1-3 / 60-120s (16 agents × multi-turn)")
- if not yes:
- if not typer.confirm(" Continue? (N=exit)", default=False):
- raise typer.Exit(0)
+ if not yes and not typer.confirm(" Continue? (N=exit)", default=False):
+ raise typer.Exit(0)
if not skip_smoke:
from runtime.healthcheck.llm_smoke import run_llm_smoke
console.print("\n[bold]Pre-flight · doctor --llm-smoke (single round-trip)[/]")
diff --git a/runtime/cli/commands/doctor.py b/runtime/cli/commands/doctor.py
index 61c40e2..f5504d9 100644
--- a/runtime/cli/commands/doctor.py
+++ b/runtime/cli/commands/doctor.py
@@ -4,7 +4,7 @@
import typer
-from runtime.cli._shared import console, ping_db, ping_minio, _kernel
+from runtime.cli._shared import _kernel, console, ping_db, ping_minio
from runtime.config.settings import get_settings
diff --git a/runtime/cli/commands/export.py b/runtime/cli/commands/export.py
index 036efdc..ab7ed05 100644
--- a/runtime/cli/commands/export.py
+++ b/runtime/cli/commands/export.py
@@ -19,9 +19,9 @@ def export(
out_dir: str = typer.Option("workspace/testcases", "--out-dir", help="output dir when --format all"),
):
"""Export TestCaseTree to xmind / markmap / opml / all."""
- from runtime.exporters import xmind as _x # noqa: F401
from runtime.exporters import markmap as _m # noqa: F401
from runtime.exporters import opml as _o # noqa: F401
+ from runtime.exporters import xmind as _x # noqa: F401
from runtime.exporters.base import REGISTRY, get_exporter
plan_path = Path(plan)
diff --git a/runtime/cli/commands/init.py b/runtime/cli/commands/init.py
index 3ffa116..1ab0817 100644
--- a/runtime/cli/commands/init.py
+++ b/runtime/cli/commands/init.py
@@ -43,7 +43,7 @@ def init(
res = render_all(answers, Path(out), matrix=matrix, overwrite=overwrite)
except FileExistsError as e:
console.print(f"[red]{e}[/]")
- raise typer.Exit(2)
+ raise typer.Exit(2) from e
console.print("\n[bold green]✓ config generated[/]")
console.print(f" .env → {res.env_path}")
diff --git a/runtime/cli/commands/readiness.py b/runtime/cli/commands/readiness.py
index 1d37523..ef4f8b0 100644
--- a/runtime/cli/commands/readiness.py
+++ b/runtime/cli/commands/readiness.py
@@ -4,7 +4,6 @@
import json
from pathlib import Path
-from typing import Optional
import typer
from rich.panel import Panel
@@ -17,12 +16,12 @@ def register(app: typer.Typer) -> None:
@app.command()
def readiness(
- smoke: float = typer.Option(1.0, "--smoke", help="Smoke pass rate (0-1)"),
- regression: float = typer.Option(1.0, "--regression", help="Regression pass rate (0-1)"),
- perf_ok: bool = typer.Option(False, "--perf-ok", help="Performance gate passed"),
- security_ok: bool = typer.Option(False, "--security-ok", help="Security gate passed"),
- p0_bugs: int = typer.Option(0, "--p0-bugs", help="P0 bug count"),
- from_summary: Optional[Path] = typer.Option(None, "--from-summary", help="Run summary JSON path"),
+ smoke: float = typer.Option(1.0, "--smoke", help="Smoke pass rate (0-1)"), # noqa: B008
+ regression: float = typer.Option(1.0, "--regression", help="Regression pass rate (0-1)"), # noqa: B008
+ perf_ok: bool = typer.Option(False, "--perf-ok", help="Performance gate passed"), # noqa: B008
+ security_ok: bool = typer.Option(False, "--security-ok", help="Security gate passed"), # noqa: B008
+ p0_bugs: int = typer.Option(0, "--p0-bugs", help="P0 bug count"), # noqa: B008
+ from_summary: Path | None = typer.Option(None, "--from-summary", help="Run summary JSON path"), # noqa: B008
) -> None:
"""Weighted release readiness score (smoke×0.4 + regression×0.3 + perf×0.2 + security×0.1)."""
if from_summary:
diff --git a/runtime/cli/commands/run.py b/runtime/cli/commands/run.py
index 1ad2600..11835bc 100644
--- a/runtime/cli/commands/run.py
+++ b/runtime/cli/commands/run.py
@@ -7,7 +7,7 @@
import typer
-from runtime.cli._shared import build_artifact, console, print_dag, _kernel
+from runtime.cli._shared import _kernel, build_artifact, console, print_dag
from runtime.tutor.i18n import set_lang
from runtime.tutor.verbosity import set_mode
@@ -41,8 +41,8 @@ def run(
@app.command()
def plan(
target: str = typer.Argument(...),
- note: str = typer.Option("", "--note"),
- out: Path | None = typer.Option(None, "--out", help="write decision JSON to file"),
+ note: str = typer.Option("", "--note"), # noqa: B008
+ out: Path | None = typer.Option(None, "--out", help="write decision JSON to file"), # noqa: B008
):
"""Plan only (no execution)."""
art = build_artifact(target, note)
diff --git a/runtime/cli/commands/selftest.py b/runtime/cli/commands/selftest.py
index 1c7e5ba..72192b2 100644
--- a/runtime/cli/commands/selftest.py
+++ b/runtime/cli/commands/selftest.py
@@ -7,7 +7,7 @@
import typer
from runtime.api.parsers import parse_path
-from runtime.cli._shared import console, _kernel
+from runtime.cli._shared import _kernel, console
def register(app: typer.Typer) -> None:
diff --git a/runtime/cli/config.py b/runtime/cli/config.py
index 897ba14..f3ad367 100644
--- a/runtime/cli/config.py
+++ b/runtime/cli/config.py
@@ -121,7 +121,7 @@ def cmd_list() -> None:
for name, info in COMPAT_EXAMPLES.items():
typer.echo(f" {name:18s} {info}")
typer.echo("")
- typer.echo("📖 Full cookbook: 04-配置文件/llm-providers.md")
+ typer.echo("📖 Full cookbook: config/llm-providers.md")
@config_app.command("show")
@@ -215,4 +215,4 @@ def cmd_unset(
_write_env(env_path, env)
typer.echo(f"✅ 已移除 {key} (原值: {old_value})")
typer.echo(f" 备份: {env_path}.bak")
- typer.echo(f" 下一步: tagent config use 重设, 或 tagent config show 验证")
+ typer.echo(" 下一步: tagent config use 重设, 或 tagent config show 验证")
diff --git a/runtime/cli/main.py b/runtime/cli/main.py
index b06598f..dbfa434 100644
--- a/runtime/cli/main.py
+++ b/runtime/cli/main.py
@@ -29,16 +29,16 @@ def _version_callback(
# Register command modules
-from runtime.cli.commands.bootstrap import register as _reg_bootstrap
-from runtime.cli.commands.catalog import register as _reg_catalog
-from runtime.cli.commands.demo import register as _reg_demo
-from runtime.cli.commands.doctor import register as _reg_doctor
-from runtime.cli.commands.export import register as _reg_export
-from runtime.cli.commands.init import register as _reg_init
-from runtime.cli.commands.market import register as _reg_market
-from runtime.cli.commands.readiness import register as _reg_readiness
-from runtime.cli.commands.run import register_run as _reg_run
-from runtime.cli.commands.selftest import register as _reg_selftest
+from runtime.cli.commands.bootstrap import register as _reg_bootstrap # noqa: E402
+from runtime.cli.commands.catalog import register as _reg_catalog # noqa: E402
+from runtime.cli.commands.demo import register as _reg_demo # noqa: E402
+from runtime.cli.commands.doctor import register as _reg_doctor # noqa: E402
+from runtime.cli.commands.export import register as _reg_export # noqa: E402
+from runtime.cli.commands.init import register as _reg_init # noqa: E402
+from runtime.cli.commands.market import register as _reg_market # noqa: E402
+from runtime.cli.commands.readiness import register as _reg_readiness # noqa: E402
+from runtime.cli.commands.run import register_run as _reg_run # noqa: E402
+from runtime.cli.commands.selftest import register as _reg_selftest # noqa: E402
_reg_bootstrap(app)
_reg_catalog(app)
diff --git a/runtime/compliance/engine.py b/runtime/compliance/engine.py
index 22ec691..fe47a98 100644
--- a/runtime/compliance/engine.py
+++ b/runtime/compliance/engine.py
@@ -11,7 +11,6 @@
from __future__ import annotations
import json
-import os
import re
from dataclasses import dataclass, field
from pathlib import Path
@@ -218,11 +217,6 @@ def _evaluate_profile(profile: dict[str, Any]) -> ComplianceReport:
report.manual += 1
# Run auto-checks
- one_time = os.getcwd
- try:
- os.getcwd = lambda: str(Path.cwd()) # no-op, use actual cwd
- except Exception:
- pass
for auto_fn in AUTO_CHECKS:
result = auto_fn()
report.results.append(result)
diff --git a/runtime/compliance/eu_ai_act.py b/runtime/compliance/eu_ai_act.py
index dfa0ee8..ab70662 100644
--- a/runtime/compliance/eu_ai_act.py
+++ b/runtime/compliance/eu_ai_act.py
@@ -20,12 +20,7 @@
from __future__ import annotations
import json
-import os
-import re
-from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any
-
# ═══════════════════════════════════════════════════════════════
# Annex III: High-Risk Classification
diff --git a/runtime/config/safety.py b/runtime/config/safety.py
index 220985d..ca7d451 100644
--- a/runtime/config/safety.py
+++ b/runtime/config/safety.py
@@ -6,7 +6,6 @@
from __future__ import annotations
from functools import lru_cache
-from pathlib import Path
from typing import Any
import yaml
diff --git a/runtime/config/settings.py b/runtime/config/settings.py
index 9fdb1bd..88bfadb 100644
--- a/runtime/config/settings.py
+++ b/runtime/config/settings.py
@@ -30,9 +30,9 @@ class Settings(BaseSettings):
)
project_root: Path = Field(default_factory=_get_project_root)
- experts_dir: Path = Field(default=Path("02-专家定义"))
- skills_dir: Path = Field(default=Path("03-技能定义"))
- scripts_dir: Path = Field(default=Path("05-代码示例"))
+ experts_dir: Path = Field(default=Path("agents"))
+ skills_dir: Path = Field(default=Path("skills"))
+ scripts_dir: Path = Field(default=Path("utils"))
workspace_dir: Path = Field(default=Path("workspace"))
llm_provider: str = Field(default="claude")
@@ -83,6 +83,14 @@ class Settings(BaseSettings):
docker_host: str = Field(default="")
ci_mode: bool = Field(default=False)
+ def model_post_init(self, _context: object) -> None:
+ """Resolve relative Path fields to absolute after model init."""
+ root = self.project_root
+ for attr in ("experts_dir", "skills_dir", "scripts_dir", "workspace_dir"):
+ p = getattr(self, attr)
+ if not p.is_absolute():
+ object.__setattr__(self, attr, (root / p).resolve())
+
def resolve(self, rel: Path) -> Path:
return rel if rel.is_absolute() else (self.project_root / rel).resolve()
diff --git a/runtime/docker-compose.app.yml b/runtime/docker-compose.app.yml
index 426f22f..8440723 100644
--- a/runtime/docker-compose.app.yml
+++ b/runtime/docker-compose.app.yml
@@ -10,12 +10,12 @@ services:
minio:
condition: service_healthy
environment:
- TAGENT_DB_URL: postgresql://tagent:tagent@postgres:5432/tagent
- TAGENT_MINIO_ENDPOINT: minio:9000
- TAGENT_MINIO_ACCESS_KEY: tagent
- TAGENT_MINIO_SECRET_KEY: tagent-secret
- TAGENT_LLM_PROVIDER: stub
- TAGENT_LOG_LEVEL: INFO
+ TAGENT_DB_URL: ${TAGENT_DB_URL:-postgresql://tagent:tagent@postgres:5432/tagent}
+ TAGENT_MINIO_ENDPOINT: ${TAGENT_MINIO_ENDPOINT:-minio:9000}
+ TAGENT_MINIO_ACCESS_KEY: ${TAGENT_MINIO_ACCESS_KEY:-tagent}
+ TAGENT_MINIO_SECRET_KEY: ${TAGENT_MINIO_SECRET_KEY:-tagent-secret}
+ TAGENT_LLM_PROVIDER: ${TAGENT_LLM_PROVIDER:-stub}
+ TAGENT_LOG_LEVEL: ${TAGENT_LOG_LEVEL:-INFO}
ports:
- "8800:8800"
volumes:
diff --git a/runtime/essence_watcher/INDEX.md b/runtime/essence_watcher/INDEX.md
index a3b404b..4a79423 100644
--- a/runtime/essence_watcher/INDEX.md
+++ b/runtime/essence_watcher/INDEX.md
@@ -25,7 +25,7 @@
c. 写 upstream update 文件
d. 标 confidence: llm-draft-unreviewed
5. 应用 policy.yaml:
- - skill-related delta → 提议入 03-技能定义/
+ - skill-related delta → 提议入 skills/
- rule-related delta → 提议入主宪章 § 待审
- 其他 → 仅入 upstream 不动 Test-Agent
```
@@ -46,7 +46,7 @@ essence_watcher:
```yaml
# 哪些 delta 自动提议入 Test-Agent
auto_propose:
- - skill_definitions # 新 skill 名字 / 描述 / 元数据 → 提议 03-技能定义/
+ - skill_definitions # 新 skill 名字 / 描述 / 元数据 → 提议 skills/
- charter_rules # 主宪章规则更新 → 提议 主宪章
- safety_patterns # 防护模式 → 提议 §24 safe-by-default
- test_methodology # 测试方法论新增 → 提议 §17/§21
diff --git a/runtime/essence_watcher/apply_policy.example.yaml b/runtime/essence_watcher/apply_policy.example.yaml
index 849d2c3..3a577e8 100644
--- a/runtime/essence_watcher/apply_policy.example.yaml
+++ b/runtime/essence_watcher/apply_policy.example.yaml
@@ -1,6 +1,6 @@
# essence_watcher 选择性应用 policy(主宪章 §29)
#
-# 实际部署:cp 到 D:/项目文件/_精髓库/_apply_policy.yaml 启用
+# 实际部署:cp 到 /_apply_policy.yaml 启用
# 默认所有 delta 仅入精髓库,不动 Test-Agent.
# 自动提议入 Test-Agent(待审)
diff --git a/runtime/essence_watcher/delta_extractor.py b/runtime/essence_watcher/delta_extractor.py
index e38f7f8..1aacffc 100644
--- a/runtime/essence_watcher/delta_extractor.py
+++ b/runtime/essence_watcher/delta_extractor.py
@@ -3,7 +3,6 @@
from __future__ import annotations
import base64
-import json
import subprocess
from datetime import datetime, timezone
from pathlib import Path
@@ -85,7 +84,7 @@ def extract_delta(essence_name: str, repo_url: str, prev_sha: str | None, new_sh
except Exception as e:
logger.warning("LLM delta extraction failed: {}", e)
return {
- "delta_summary": f"LLM unavailable, manual review required",
+ "delta_summary": "LLM unavailable, manual review required",
"new_skills": [],
"new_rules": [],
"new_test_methodology": [],
@@ -112,11 +111,11 @@ def write_update_report(essence_name: str, repo_url: str, prev_sha: str | None,
f"## Summary\n{delta.get('delta_summary', '(none)')}\n\n"
f"## Applies to Test-Agent?\n**{delta.get('applies_to_test_agent', False)}** (LLM confidence: {delta.get('confidence', 'low')})\n\n"
f"## New skills\n" + "\n".join(f"- {s}" for s in delta.get("new_skills", [])) + "\n\n"
- f"## New rules\n" + "\n".join(f"- {s}" for s in delta.get("new_rules", [])) + "\n\n"
- f"## New test methodology\n" + "\n".join(f"- {s}" for s in delta.get("new_test_methodology", [])) + "\n\n"
- f"## Evidence(原文引用)\n" + "\n".join(f"> {e}" for e in delta.get("evidence", [])) + "\n\n"
- f"---\n"
- f"**Action required**: 用户审 → 改 `confidence: high/medium/low` + 填 `reviewer/last_reviewed`;若 applies_to_test_agent → 触发 Test-Agent 集成 PR;否则仅入 upstream 即结束。\n",
+ "## New rules\n" + "\n".join(f"- {s}" for s in delta.get("new_rules", [])) + "\n\n"
+ "## New test methodology\n" + "\n".join(f"- {s}" for s in delta.get("new_test_methodology", [])) + "\n\n"
+ "## Evidence(原文引用)\n" + "\n".join(f"> {e}" for e in delta.get("evidence", [])) + "\n\n"
+ "---\n"
+ "**Action required**: 用户审 → 改 `confidence: high/medium/low` + 填 `reviewer/last_reviewed`;若 applies_to_test_agent → 触发 Test-Agent 集成 PR;否则仅入 upstream 即结束。\n",
encoding="utf-8",
)
return target
diff --git a/runtime/essence_watcher/runner.py b/runtime/essence_watcher/runner.py
index a883c34..95b4965 100644
--- a/runtime/essence_watcher/runner.py
+++ b/runtime/essence_watcher/runner.py
@@ -11,7 +11,7 @@
from loguru import logger
-from runtime.config.safety import SafeByDefaultBlocked, gate_curator_run, get_setting, is_allowed
+from runtime.config.safety import SafeByDefaultBlocked, is_allowed
from runtime.essence_watcher.delta_extractor import extract_delta, write_update_report
from runtime.essence_watcher.parser import list_repos
from runtime.essence_watcher.tracker import detect_changes
diff --git a/runtime/exporters/__init__.py b/runtime/exporters/__init__.py
index 63a0d04..8a29e67 100644
--- a/runtime/exporters/__init__.py
+++ b/runtime/exporters/__init__.py
@@ -6,4 +6,4 @@
Registered exporters expose `.export(tree: TestCaseTree, target: Path) -> Path`.
"""
-from runtime.exporters.base import Exporter, REGISTRY, TestCaseNode, TestCaseTree, register # noqa: F401
+from runtime.exporters.base import REGISTRY, Exporter, TestCaseNode, TestCaseTree, register # noqa: F401
diff --git a/runtime/exporters/base.py b/runtime/exporters/base.py
index 1d3ee2a..acaeb7b 100644
--- a/runtime/exporters/base.py
+++ b/runtime/exporters/base.py
@@ -25,7 +25,7 @@ class TestCaseNode:
expected: list[str] = field(default_factory=list)
notes: str = ""
tags: list[str] = field(default_factory=list)
- children: list["TestCaseNode"] = field(default_factory=list)
+ children: list[TestCaseNode] = field(default_factory=list)
id: str = "" # optional,LLM 可不填,exporter 自动生成
diff --git a/runtime/gateway/__init__.py b/runtime/gateway/__init__.py
index ff1bdfe..707ec63 100644
--- a/runtime/gateway/__init__.py
+++ b/runtime/gateway/__init__.py
@@ -3,8 +3,7 @@
Single gateway process serves N platforms. Cross-platform conversation continuity.
"""
-from runtime.gateway.base import REGISTRY, Platform, get_platform, register # noqa: F401
-
# 触发 8 个 platform 子模块 @register("xxx") 装饰器加载,填充 REGISTRY
# 不导入 platforms 包 → REGISTRY 永空 → get_platform("feishu") KeyError (W4-4 同模式扩散修)
from runtime.gateway import platforms # noqa: F401, E402
+from runtime.gateway.base import REGISTRY, Platform, get_platform, register # noqa: F401
diff --git a/runtime/gateway/platforms/telegram.py b/runtime/gateway/platforms/telegram.py
index 5db18ac..35f3dfe 100644
--- a/runtime/gateway/platforms/telegram.py
+++ b/runtime/gateway/platforms/telegram.py
@@ -4,8 +4,6 @@
import os
-from loguru import logger
-
from runtime.gateway.base import DeliveryResult, Message, Platform, register
diff --git a/runtime/healthcheck/agent_smoke.py b/runtime/healthcheck/agent_smoke.py
index 4edabdd..7d05aa5 100644
--- a/runtime/healthcheck/agent_smoke.py
+++ b/runtime/healthcheck/agent_smoke.py
@@ -1,8 +1,8 @@
"""L1 frontmatter lint · 无 LLM · pre-push / pre-commit / doctor 共用.
校验:
-- 02-专家定义/[0-9]*.md 16 个文件 frontmatter `name`/`description`/`tools` 必填
-- 03-技能定义/*.md(排除 README/INDEX/上游 darwin-skill/karpathy-guidelines)`name`/`description` 必填
+- agents/[0-9]*.md 16 个文件 frontmatter `name`/`description`/`tools` 必填
+- skills/*.md(排除 README/INDEX/上游 darwin-skill/karpathy-guidelines)`name`/`description` 必填
- registry.build_catalog() 加载后 16 expert 全在,且 name 字段与 file slug 协同(只看 frontmatter name)
- 所有 agent 文件名形如 `NN-中文.md`(NN 两位数 01-16),序号连续无跳
diff --git a/runtime/healthcheck/llm_probe.py b/runtime/healthcheck/llm_probe.py
index cfbf89e..4745753 100644
--- a/runtime/healthcheck/llm_probe.py
+++ b/runtime/healthcheck/llm_probe.py
@@ -12,7 +12,6 @@
from runtime.registry.registry import build_catalog
from runtime.subagent.aux_client import aux_client
-
SMOKE_PROMPT = "用一句话(≤30 字)用中文描述你这个测试专家的核心职责。不要任何前置废话。"
diff --git a/runtime/healthcheck/llm_smoke.py b/runtime/healthcheck/llm_smoke.py
index f6f4e47..2b05b09 100644
--- a/runtime/healthcheck/llm_smoke.py
+++ b/runtime/healthcheck/llm_smoke.py
@@ -15,7 +15,6 @@
from runtime.config.settings import get_settings
from runtime.router.llm_client import PROVIDER_MODEL_MAP
-
SMOKE_SYSTEM = "You are a translation helper. Reply with ONLY the translated text, no extra words."
SMOKE_USER = "Translate to Chinese: Hello"
diff --git a/runtime/init/INDEX.md b/runtime/init/INDEX.md
index cbfb550..28323a1 100644
--- a/runtime/init/INDEX.md
+++ b/runtime/init/INDEX.md
@@ -6,7 +6,7 @@
| 文件 | 用途 |
|------|------|
-| `matrix.py` | 加载 `04-配置文件/templates/matrix.yaml`(单源真理) |
+| `matrix.py` | 加载 `config/templates/matrix.yaml`(单源真理) |
| `wizard.py` | 交互向导 + `from_args()` 非交互 + `from_preset()` 预设 |
| `renderer.py` | 把 `InitAnswers` + matrix + 模板 → `.env` + `tagent.yml` + `STARTUP.md` |
@@ -42,7 +42,7 @@ tagent init --overwrite
- 新 LLM provider → `llm_providers:` 加节
- 新 BugTracker → `bug_trackers:` 加节(主宪章 §37 6 adapter 之外加)
- 新通知渠道 → `notifiers:` 加节(主宪章 §36 6 渠道之外加)
-- 新测试类型 → `test_types:` 加节 + 同步 `02-专家定义/` 加平台 expert(如需)
+- 新测试类型 → `test_types:` 加节 + 同步 `agents/` 加平台 expert(如需)
## 矩阵规模
@@ -51,5 +51,5 @@ tagent init --overwrite
## 相关
- 主宪章 §1(同步铁律)+ §5(多格式 I/O)+ §7(一键部署)+ §36(多端)+ §37(BugTracker)
-- 模板:[`04-配置文件/templates/`](../../04-配置文件/templates/INDEX.md)
+- 模板:[`config/templates/`](../../config/templates/INDEX.md)
- 集成 CLI:`runtime/cli/main.py` `init` 子命令
diff --git a/runtime/init/__init__.py b/runtime/init/__init__.py
index 8105d16..f613d5e 100644
--- a/runtime/init/__init__.py
+++ b/runtime/init/__init__.py
@@ -1,6 +1,6 @@
"""tagent init · 配置自动组装(V1.12.0).
-读 `04-配置文件/templates/matrix.yaml` 矩阵 + base.*.tpl 模板,产 `.env` + `tagent.yml` + `STARTUP.md`。
+读 `config/templates/matrix.yaml` 矩阵 + base.*.tpl 模板,产 `.env` + `tagent.yml` + `STARTUP.md`。
矩阵 8 测试类型 × 6 平台 × 5 LLM × 6 BugTracker × 6 通知 = 8640 组合,wizard 自动列出。
主入口:
diff --git a/runtime/init/matrix.py b/runtime/init/matrix.py
index 06267d7..41f3a92 100644
--- a/runtime/init/matrix.py
+++ b/runtime/init/matrix.py
@@ -60,7 +60,7 @@ class Matrix:
def _matrix_path() -> Path:
from runtime.config.settings import get_settings
- return get_settings().project_root / "04-配置文件" / "templates" / "matrix.yaml"
+ return get_settings().project_root / "config" / "templates" / "matrix.yaml"
def load_matrix(path: Path | None = None) -> Matrix:
diff --git a/runtime/init/renderer.py b/runtime/init/renderer.py
index fdbe00c..b6e5026 100644
--- a/runtime/init/renderer.py
+++ b/runtime/init/renderer.py
@@ -13,7 +13,7 @@
def _templates_dir() -> Path:
from runtime.config.settings import get_settings
- return get_settings().project_root / "04-配置文件" / "templates"
+ return get_settings().project_root / "config" / "templates"
def _read_version() -> str:
@@ -103,6 +103,10 @@ def _build_tpl_vars(ans: InitAnswers, m: Matrix) -> dict[str, str]:
"REQUIRED_FILLS_BLOCK": required_fills,
"PLATFORM_DEPS_HINT": platform_deps_hint,
"SAMPLE_TARGET": sample_target,
+ # infra defaults (dev only — user must change for production)
+ "DB_URL": "postgresql+psycopg://tagent:tagent@localhost:5432/tagent",
+ "MINIO_ACCESS_KEY": "minioadmin",
+ "MINIO_SECRET_KEY": "minioadmin",
}
@@ -124,7 +128,7 @@ def _required_hint(key: str, ans: InitAnswers, m: Matrix) -> str:
"TARGET_URL": "渗透目标 URL",
"SCAN_PROFILE": "quick / full / stealth",
}
- return hints.get(key, "见 04-配置文件/INDEX.md")
+ return hints.get(key, "见 config/INDEX.md")
def _apply(tpl: str, vars_: dict[str, str]) -> str:
diff --git a/runtime/intelligence/canary_config.py b/runtime/intelligence/canary_config.py
index 68d43b3..060c88f 100644
--- a/runtime/intelligence/canary_config.py
+++ b/runtime/intelligence/canary_config.py
@@ -16,10 +16,8 @@
import json
import math
-import time
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any
import yaml
diff --git a/runtime/intelligence/data_lifecycle.py b/runtime/intelligence/data_lifecycle.py
index d6e2693..859fee6 100644
--- a/runtime/intelligence/data_lifecycle.py
+++ b/runtime/intelligence/data_lifecycle.py
@@ -11,7 +11,6 @@
import hashlib
import json
-import os
import time
from collections import defaultdict
from dataclasses import dataclass, field
diff --git a/runtime/intelligence/flaky_analyzer.py b/runtime/intelligence/flaky_analyzer.py
index 0a3e412..d1f99d5 100644
--- a/runtime/intelligence/flaky_analyzer.py
+++ b/runtime/intelligence/flaky_analyzer.py
@@ -10,13 +10,12 @@
from __future__ import annotations
+import contextlib
import json
import re
-import subprocess
import time
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any
@dataclass
@@ -82,10 +81,8 @@ def _parse_log_line(line: str, source: str) -> LogEntry | None:
ts_match = re.match(r'(\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2})', line)
ts = time.time()
if ts_match:
- try:
+ with contextlib.suppress(ValueError):
ts = time.mktime(time.strptime(ts_match.group(1)[:19], "%Y-%m-%dT%H:%M:%S"))
- except ValueError:
- pass
level = "INFO"
for lv in ["ERROR", "CRITICAL", "WARN", "WARNING", "INFO", "DEBUG"]:
@@ -105,11 +102,11 @@ def diagnose_heuristic(test_name: str, logs: list[LogEntry],
"""Heuristic root cause analysis (no LLM required).
Production path should use diagnose_with_llm()."""
- errors = [l for l in logs if l.level in ("ERROR", "CRITICAL")]
- test_errors = [l for l in errors if l.source == "test"]
- sut_errors = [l for l in errors if l.source == "sut"]
- db_errors = [l for l in errors if l.source == "db"]
- network_errors = [l for l in errors if l.source == "network"]
+ errors = [e for e in logs if e.level in ("ERROR", "CRITICAL")]
+ test_errors = [e for e in errors if e.source == "test"]
+ sut_errors = [e for e in errors if e.source == "sut"]
+ db_errors = [e for e in errors if e.source == "db"]
+ network_errors = [e for e in errors if e.source == "network"]
# Rule 1: Only test errors → test logic issue
if test_errors and not sut_errors and not db_errors and not network_errors:
@@ -260,14 +257,12 @@ def _parse_llm_response(raw: str, test_name: str) -> DiagnosisResult:
logs_match = re.search(r'==Most Relevant Log Lines==\s*\n(.+?)(?=\n==|$)', raw, re.DOTALL)
if logs_match:
- log_lines = [l.strip("- ") for l in logs_match.group(1).strip().split("\n") if l.strip()]
+ log_lines = [line.strip("- ") for line in logs_match.group(1).strip().split("\n") if line.strip()]
conf_match = re.search(r'==Confidence==\s*\n([\d.]+)', raw)
if conf_match:
- try:
+ with contextlib.suppress(ValueError):
confidence = float(conf_match.group(1))
- except ValueError:
- pass
return DiagnosisResult(
conclusion=conclusion or f"Analysis for {test_name}",
diff --git a/runtime/intelligence/impact_analyzer.py b/runtime/intelligence/impact_analyzer.py
index 89ce7ac..87b8c20 100644
--- a/runtime/intelligence/impact_analyzer.py
+++ b/runtime/intelligence/impact_analyzer.py
@@ -8,7 +8,6 @@
import ast
import subprocess
from pathlib import Path
-from typing import Dict, List, Optional, Set
class ImportGraph:
@@ -16,8 +15,8 @@ class ImportGraph:
def __init__(self, root: str | Path):
self.root = Path(root)
- self._imports: Dict[str, Set[str]] = {} # module → {modules it imports}
- self._imported_by: Dict[str, Set[str]] = {} # module → {modules that import it}
+ self._imports: dict[str, set[str]] = {} # module → {modules it imports}
+ self._imported_by: dict[str, set[str]] = {} # module → {modules that import it}
def scan(self, max_files: int = 500) -> int:
"""Scan all .py files under root, build bidirectional import graph."""
@@ -39,35 +38,34 @@ def scan(self, max_files: int = 500) -> int:
imported = alias.name.split(".")[0]
self._imports[module].add(imported)
self._imported_by.setdefault(imported, set()).add(module)
- elif isinstance(node, ast.ImportFrom):
- if node.module:
- imported = node.module.split(".")[0]
- self._imports[module].add(imported)
- self._imported_by.setdefault(imported, set()).add(module)
+ elif isinstance(node, ast.ImportFrom) and node.module:
+ imported = node.module.split(".")[0]
+ self._imports[module].add(imported)
+ self._imported_by.setdefault(imported, set()).add(module)
count += 1
return count
- def affected_modules(self, changed_files: List[str]) -> Set[str]:
+ def affected_modules(self, changed_files: list[str]) -> set[str]:
"""Given a list of changed file paths, return all modules potentially affected.
Includes:
- The changed modules themselves
- Any module that imports them (1‑hop downstream)
"""
- changed_modules: Set[str] = set()
+ changed_modules: set[str] = set()
for cf in changed_files:
m = _path_to_module(Path(cf), self.root)
if m:
changed_modules.add(m)
- affected: Set[str] = set(changed_modules)
+ affected: set[str] = set(changed_modules)
for m in changed_modules:
downstream = self._imported_by.get(m, set())
affected.update(downstream)
return affected
- def affected_tests(self, changed_files: List[str], test_dirs: List[str] | None = None) -> List[str]:
+ def affected_tests(self, changed_files: list[str], test_dirs: list[str] | None = None) -> list[str]:
"""Find test files most likely impacted by changed_files.
Returns sorted list of test file paths.
@@ -78,7 +76,7 @@ def affected_tests(self, changed_files: List[str], test_dirs: List[str] | None =
affected = self.affected_modules(changed_files)
# Find test files that import affected modules or are in test dirs
- candidates: List[str] = []
+ candidates: list[str] = []
for f in self.root.rglob("test_*.py"):
if ".venv" in f.parts or "__pycache__" in f.parts:
continue
@@ -92,10 +90,9 @@ def affected_tests(self, changed_files: List[str], test_dirs: List[str] | None =
if alias.name.split(".")[0] in affected:
candidates.append(str(f.relative_to(self.root)))
break
- elif isinstance(node, ast.ImportFrom):
- if node.module and node.module.split(".")[0] in affected:
- candidates.append(str(f.relative_to(self.root)))
- break
+ elif isinstance(node, ast.ImportFrom) and node.module and node.module.split(".")[0] in affected:
+ candidates.append(str(f.relative_to(self.root)))
+ break
# Also include any test_*.py in test directories
for td in test_dirs:
@@ -124,8 +121,8 @@ def _path_to_module(p: Path, root: Path) -> str:
def analyze_impact(
project_root: str | Path,
base_branch: str = "main",
- test_dirs: Optional[List[str]] = None,
-) -> Dict:
+ test_dirs: list[str] | None = None,
+) -> dict:
"""Main entry point: git diff → import graph → impacted test list.
Returns:
@@ -143,7 +140,7 @@ def analyze_impact(
raise FileNotFoundError(f"project root not found: {root}")
# git diff
- changed_files: List[str] = []
+ changed_files: list[str] = []
try:
result = subprocess.run(
["git", "-C", str(root), "diff", "--name-only", f"{base_branch}...HEAD"],
@@ -151,7 +148,7 @@ def analyze_impact(
)
changed_files = [f.strip() for f in result.stdout.strip().split("\n") if f.strip()]
except Exception as e:
- raise RuntimeError(f"git diff failed: {e}")
+ raise RuntimeError(f"git diff failed: {e}") from e
if not changed_files:
return {
@@ -203,7 +200,7 @@ def _cli() -> None:
report = analyze_impact(Path(args.root), base_branch=args.base)
- if getattr(args, "json"):
+ if args.json:
print(_json.dumps(report, indent=2, ensure_ascii=False))
else:
print(f"Changed files: {len(report['changed_files'])}")
diff --git a/runtime/intelligence/journey_mapper.py b/runtime/intelligence/journey_mapper.py
index 14ee19d..76e8eb7 100644
--- a/runtime/intelligence/journey_mapper.py
+++ b/runtime/intelligence/journey_mapper.py
@@ -8,12 +8,11 @@
import json
import logging
from pathlib import Path
-from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# Default journey → module mapping (extend via workspace/journey_map.json)
-DEFAULT_JOURNEYS: Dict[str, List[str]] = {
+DEFAULT_JOURNEYS: dict[str, list[str]] = {
"Registration": ["auth/register", "signup", "user/create"],
"Login": ["auth/login", "session", "login"],
"Payment": ["payment/", "order/", "checkout", "billing"],
@@ -25,7 +24,7 @@
}
-def load_journey_map(source: Optional[str | Path] = None) -> Dict[str, List[str]]:
+def load_journey_map(source: str | Path | None = None) -> dict[str, list[str]]:
"""Load journey map from JSON file, or use defaults."""
if source:
p = Path(source)
@@ -36,9 +35,9 @@ def load_journey_map(source: Optional[str | Path] = None) -> Dict[str, List[str]
def map_failures_to_journeys(
- failures: List[Dict],
- journey_map: Optional[Dict[str, List[str]]] = None,
-) -> Dict[str, List[Dict]]:
+ failures: list[dict],
+ journey_map: dict[str, list[str]] | None = None,
+) -> dict[str, list[dict]]:
"""Given a list of {name, ...} failures, return journeys → affected failures.
Args:
@@ -50,7 +49,7 @@ def map_failures_to_journeys(
if journey_map is None:
journey_map = load_journey_map()
- impacted: Dict[str, List[Dict]] = {}
+ impacted: dict[str, list[dict]] = {}
unmatched = list(failures)
for journey, patterns in journey_map.items():
@@ -72,9 +71,9 @@ def map_failures_to_journeys(
def journey_impact_report(
- failures: List[Dict],
- journey_map: Optional[Dict[str, List[str]]] = None,
-) -> Dict:
+ failures: list[dict],
+ journey_map: dict[str, list[str]] | None = None,
+) -> dict:
"""Generate full journey impact report.
Returns:
@@ -104,7 +103,7 @@ def journey_impact_report(
}
-def to_markdown(report: Dict) -> str:
+def to_markdown(report: dict) -> str:
lines = [
"# Journey Impact Report",
"",
@@ -155,6 +154,5 @@ def _cli() -> None:
# Late import for CLI
from runtime.config.settings import get_settings # noqa: E402
-
if __name__ == "__main__":
_cli()
diff --git a/runtime/intelligence/risk_matrix.py b/runtime/intelligence/risk_matrix.py
index faf2565..93a7e53 100644
--- a/runtime/intelligence/risk_matrix.py
+++ b/runtime/intelligence/risk_matrix.py
@@ -8,7 +8,6 @@
import json
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Dict, List
@dataclass
@@ -18,7 +17,7 @@ class RiskItem:
probability: float # 0.0 – 1.0 (calibrated)
impact: float # 0.0 – 1.0 (calibrated)
category: str = "functional"
- mitigations: List[str] = field(default_factory=list)
+ mitigations: list[str] = field(default_factory=list)
residual_probability: float | None = None
residual_impact: float | None = None
@@ -47,7 +46,7 @@ def level(self) -> str:
@dataclass
class RiskMatrix:
- items: List[RiskItem] = field(default_factory=list)
+ items: list[RiskItem] = field(default_factory=list)
def add(self, item: RiskItem) -> None:
self.items.append(item)
@@ -59,7 +58,7 @@ def calibrate(self, historical_fail_rate: float = 0.05) -> None:
n = 3 # effective sample size
item.probability = round((item.probability * n + historical_fail_rate) / (n + 1), 3)
- def mitigate(self, item_id: str, residual_prob: float, residual_impact: float, mitigations: List[str]) -> None:
+ def mitigate(self, item_id: str, residual_prob: float, residual_impact: float, mitigations: list[str]) -> None:
for item in self.items:
if item.id == item_id:
item.residual_probability = residual_prob
@@ -68,7 +67,7 @@ def mitigate(self, item_id: str, residual_prob: float, residual_impact: float, m
return
raise KeyError(f"risk item '{item_id}' not found")
- def summary(self) -> Dict:
+ def summary(self) -> dict:
levels = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0}
for item in self.items:
levels[item.level] += 1
diff --git a/runtime/intelligence/test_prioritizer.py b/runtime/intelligence/test_prioritizer.py
index f89082e..ab0e7fa 100644
--- a/runtime/intelligence/test_prioritizer.py
+++ b/runtime/intelligence/test_prioritizer.py
@@ -16,13 +16,11 @@
import json
import math
-import os
import subprocess
import time
from collections import defaultdict
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any
@dataclass
diff --git a/runtime/learning_loop/INDEX.md b/runtime/learning_loop/INDEX.md
index 06a01fe..0077d45 100644
--- a/runtime/learning_loop/INDEX.md
+++ b/runtime/learning_loop/INDEX.md
@@ -5,7 +5,7 @@
## 不变量(与 hermes 同源)
-- **只动 agent-created skill**(不动 02-专家定义/03-技能定义已有)
+- **只动 agent-created skill**(不动 agents/skills已有)
- **绝不自动删,只归档**(`workspace/learning/archive/`)
- **Pinned skill 绕过所有自动**
- **用 auxiliary client**(`runtime/subagent/aux_client`)
diff --git a/runtime/learning_loop/session_search.py b/runtime/learning_loop/session_search.py
index cb76f10..6222666 100644
--- a/runtime/learning_loop/session_search.py
+++ b/runtime/learning_loop/session_search.py
@@ -5,13 +5,10 @@
from __future__ import annotations
-import json
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
-from loguru import logger
-
from runtime.config.settings import get_settings
diff --git a/runtime/marketplace/catalog.py b/runtime/marketplace/catalog.py
index 9904754..ab87c0c 100644
--- a/runtime/marketplace/catalog.py
+++ b/runtime/marketplace/catalog.py
@@ -46,8 +46,11 @@ def load_local() -> list[Entry]:
return []
out: list[Entry] = []
for e in data.get("entries", []):
+ url = e.get("source_url", "")
+ if url and not url.startswith("https://"):
+ logger.warning("marketplace entry {} has non-https source_url: {}", e.get("name", "?"), url)
out.append(Entry(
- name=e["name"], version=e["version"], lane=e["lane"], source_url=e["source_url"],
+ name=e["name"], version=e["version"], lane=e["lane"], source_url=url,
sha256=e.get("sha256", ""), signature=e.get("signature", ""), license=e.get("license", ""),
safety_score=int(e.get("safety_score", 0)), confidence=e.get("confidence", "llm-draft-unreviewed"),
source_tier=e.get("source_tier", "low"), installed_at=e.get("installed_at"),
diff --git a/runtime/marketplace/discovery.py b/runtime/marketplace/discovery.py
index 64b8b17..1436dd1 100644
--- a/runtime/marketplace/discovery.py
+++ b/runtime/marketplace/discovery.py
@@ -13,19 +13,18 @@
import logging
from importlib.metadata import EntryPoint, entry_points
-from typing import Dict, List
logger = logging.getLogger(__name__)
PLUGIN_GROUP = "tagent"
-def discover_plugins() -> Dict[str, List[EntryPoint]]:
+def discover_plugins() -> dict[str, list[EntryPoint]]:
"""Discover all registered tagent plugins grouped by type.
Returns: {"agents": [...], "skills": [...], "backends": [...]}
"""
- discovered: Dict[str, List[EntryPoint]] = {"agents": [], "skills": [], "backends": []}
+ discovered: dict[str, list[EntryPoint]] = {"agents": [], "skills": [], "backends": []}
try:
eps = entry_points(group=PLUGIN_GROUP)
for ep in eps:
@@ -47,9 +46,9 @@ def discover_plugins() -> Dict[str, List[EntryPoint]]:
return discovered
-def list_plugins() -> List[Dict[str, str]]:
+def list_plugins() -> list[dict[str, str]]:
"""Flat list of all discovered plugins."""
- plugins: List[Dict[str, str]] = []
+ plugins: list[dict[str, str]] = []
for kind, eps in discover_plugins().items():
for ep in eps:
plugins.append({
diff --git a/runtime/marketplace/installer.py b/runtime/marketplace/installer.py
index 113a2b6..2981650 100644
--- a/runtime/marketplace/installer.py
+++ b/runtime/marketplace/installer.py
@@ -14,10 +14,10 @@
from loguru import logger
-from runtime.config.settings import get_settings
from runtime.config.safety import SafeByDefaultBlocked, is_allowed
+from runtime.config.settings import get_settings
from runtime.marketplace.catalog import Entry, find, load_local, save_local
-from runtime.marketplace.verifier import GateResult, run_all_gates
+from runtime.marketplace.verifier import run_all_gates
def _market_dir() -> Path:
diff --git a/runtime/marketplace/verifier.py b/runtime/marketplace/verifier.py
index 7869936..eada6cb 100644
--- a/runtime/marketplace/verifier.py
+++ b/runtime/marketplace/verifier.py
@@ -14,8 +14,6 @@
from dataclasses import dataclass
from pathlib import Path
-from loguru import logger
-
@dataclass(slots=True)
class GateResult:
diff --git a/runtime/mcp/INDEX.md b/runtime/mcp/INDEX.md
index 0585810..480bee8 100644
--- a/runtime/mcp/INDEX.md
+++ b/runtime/mcp/INDEX.md
@@ -1,7 +1,7 @@
# mcp 索引
> 主宪章 §16 预留 6 件套,V1.2.0(M2)实现。
-> 当前 `04-配置文件/.mcp.json` 仅启用 filesystem;本目录服务通过 `04-配置文件/.mcp.json` 启用。
+> 当前 `config/.mcp.json` 仅启用 filesystem;本目录服务通过 `config/.mcp.json` 启用。
## 模块清单
@@ -23,7 +23,7 @@ python -m runtime.mcp.test_orchestrator.server # stdio mode
python -m runtime.mcp.test_orchestrator.server --http 8801 # http mode
```
-或注册到 `04-配置文件/.mcp.json`:
+或注册到 `config/.mcp.json`:
```json
{
diff --git a/runtime/mcp/__init__.py b/runtime/mcp/__init__.py
index 5432dca..727f62e 100644
--- a/runtime/mcp/__init__.py
+++ b/runtime/mcp/__init__.py
@@ -9,7 +9,7 @@
- compliance-checker: 行业合规规则库(SOC2/PCI/HIPAA/IEC 62304 等)
All servers respect:
- - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/49 脚本
+ - 主宪章 §9: 已有不动 → 仅包装,不修改 16 专家/32 skill/67 脚本
- 主宪章 §21 横切: 失败可复现(seed+snapshot+录屏),不入回归库否
- 主宪章 §18-12: 决策可追溯 → 工具调用落 decisions/
"""
diff --git a/runtime/mcp/base.py b/runtime/mcp/base.py
index f2297aa..de1fb41 100644
--- a/runtime/mcp/base.py
+++ b/runtime/mcp/base.py
@@ -3,7 +3,7 @@
Honors charter:
- §18-12 决策可追溯:工具调用自动落 `decisions/{date}_mcp_{tool}_{run_id}.json`
- §21 横切可复现性:run_id 注入 + seed 记录 + 失败 snapshot
- - §1 同步铁律:服务列表必须与 `04-配置文件/.mcp.json` 一致
+ - §1 同步铁律:服务列表必须与 `config/.mcp.json` 一致
"""
from __future__ import annotations
@@ -12,9 +12,10 @@
import json
import os
import uuid
+from collections.abc import Awaitable, Callable
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any, Awaitable, Callable
+from typing import Any
from loguru import logger
@@ -102,9 +103,9 @@ def make_server(name: str, version: str = "0.1.0"):
async def run_stdio(server) -> None:
"""Run an MCP server over stdio."""
try:
- from mcp.server.stdio import stdio_server
- from mcp.server.models import InitializationOptions
from mcp.server import NotificationOptions
+ from mcp.server.models import InitializationOptions
+ from mcp.server.stdio import stdio_server
except ImportError as e:
raise RuntimeError("mcp SDK missing components") from e
async with stdio_server() as (read, write):
diff --git a/runtime/mcp/protocol_adapter/adapters.py b/runtime/mcp/protocol_adapter/adapters.py
index b7b9f50..a92414e 100644
--- a/runtime/mcp/protocol_adapter/adapters.py
+++ b/runtime/mcp/protocol_adapter/adapters.py
@@ -4,9 +4,6 @@
import json
import time
-from typing import Any
-
-from loguru import logger
from runtime.mcp.protocol_adapter.base import ProtocolAdapter, ProtocolResult, register
diff --git a/runtime/mcp/protocol_adapter/server.py b/runtime/mcp/protocol_adapter/server.py
index c2cd194..2620b79 100644
--- a/runtime/mcp/protocol_adapter/server.py
+++ b/runtime/mcp/protocol_adapter/server.py
@@ -13,10 +13,10 @@
from loguru import logger
from runtime.mcp.base import make_server, run_stdio, tool_decision_logged
-from runtime.mcp.protocol_adapter.base import REGISTRY, get_adapter
# trigger adapter registration
from runtime.mcp.protocol_adapter import adapters # noqa: F401
+from runtime.mcp.protocol_adapter.base import REGISTRY, get_adapter
@tool_decision_logged("list_protocols")
@@ -33,7 +33,7 @@ async def tool_ping(protocol: str, target: str, payload: Any = "ping", timeout:
"target": target,
"ok": result.ok,
"elapsed_ms": result.elapsed_ms,
- "payload": result.payload if isinstance(result.payload, (str, dict, type(None))) else str(result.payload),
+ "payload": result.payload if isinstance(result.payload, str | dict | None) else str(result.payload),
"error": result.error,
"meta": result.meta,
}
diff --git a/runtime/mcp/test_orchestrator/server.py b/runtime/mcp/test_orchestrator/server.py
index 6204c4b..7029d45 100644
--- a/runtime/mcp/test_orchestrator/server.py
+++ b/runtime/mcp/test_orchestrator/server.py
@@ -12,6 +12,7 @@
import asyncio
import json
+from collections import OrderedDict
from typing import Any
from loguru import logger
@@ -25,14 +26,12 @@
# Charter §21 横切预算: 防 server 长时跑无限增长.
# Production should rely on Postgres `runs` table; this is the fast path.
_MAX_RUN_RESULTS = 1024
-_run_results: "OrderedDict[str, dict]" = None # type: ignore[assignment]
+_run_results: OrderedDict[str, dict] = None # type: ignore[assignment]
def _results_dict():
global _run_results
if _run_results is None:
- from collections import OrderedDict
-
_run_results = OrderedDict()
return _run_results
@@ -147,7 +146,7 @@ def build_server():
TOOLS = [
Tool(
name="catalog",
- description="List 16 experts + 32 skills loaded from 02-专家定义/* + 03-技能定义/*.",
+ description="List 16 experts + 32 skills loaded from agents/* + skills/*.",
inputSchema={"type": "object", "properties": {}, "additionalProperties": False},
),
Tool(
diff --git a/runtime/observability/apm_export.py b/runtime/observability/apm_export.py
index 11adc99..e2bb641 100644
--- a/runtime/observability/apm_export.py
+++ b/runtime/observability/apm_export.py
@@ -6,10 +6,10 @@
import logging
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any
-from runtime.observability.dashboard import build_dashboard
from runtime.config.settings import get_settings
+from runtime.observability.dashboard import build_dashboard
logger = logging.getLogger(__name__)
@@ -17,7 +17,7 @@
def export_datadog_dashboard(
output: str | Path,
title: str = "Test-Agent Quality",
- workspace_dir: Optional[Path] = None,
+ workspace_dir: Path | None = None,
) -> Path:
"""Generate a Datadog dashboard JSON with test quality widgets."""
ws = workspace_dir or get_settings().workspace_dir
@@ -25,7 +25,7 @@ def export_datadog_dashboard(
decision = data.get("decision", {})
diagnostic = data.get("diagnostic", {})
- dashboard: Dict[str, Any] = {
+ dashboard: dict[str, Any] = {
"title": title,
"description": "Auto‑generated by Test-Agent",
"layout_type": "ordered",
@@ -41,7 +41,7 @@ def export_datadog_dashboard(
"definition": {
"type": "query_value",
"title": "MTTD (min)",
- "requests': [{'q": f"avg:test.mttd_minutes{{{decision.get('mttd_minutes', 0)}}}", "aggregator": "avg"}],
+ "requests": [{"q": f"avg:test.mttd_minutes{{{decision.get('mttd_minutes', 0)}}}", "aggregator": "avg"}],
}
},
{
@@ -81,15 +81,15 @@ def export_datadog_dashboard(
def export_grafana_dashboard(
output: str | Path,
title: str = "Test-Agent Quality",
- workspace_dir: Optional[Path] = None,
+ workspace_dir: Path | None = None,
) -> Path:
"""Generate a Grafana dashboard JSON with test quality panels."""
ws = workspace_dir or get_settings().workspace_dir
data = build_dashboard(ws)
decision = data.get("decision", {})
- diagnostic = data.get("diagnostic", {})
+ data.get("diagnostic", {})
- now = datetime.now(timezone.utc).isoformat()
+ datetime.now(timezone.utc).isoformat()
dashboard = {
"dashboard": {
@@ -101,14 +101,14 @@ def export_grafana_dashboard(
"title": "Pass Rate",
"type": "stat",
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
- "targets": [{"expr": f"test_pass_rate{{app=\"tagent\"}}", "legendFormat": "pass"}],
+ "targets": [{"expr": "test_pass_rate{app=\"tagent\"}", "legendFormat": "pass"}],
"fieldConfig": {"defaults": {"thresholds": {"steps": [{"value": None, "color": "red"}, {"value": 60, "color": "yellow"}, {"value": 85, "color": "green"}]}}},
},
{
"title": "Trend",
"type": "stat",
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0},
- "targets": [{"expr": f"test_trend{{app=\"tagent\"}}", "legendFormat": decision.get("trend", "stable")}],
+ "targets": [{"expr": "test_trend{app=\"tagent\"}", "legendFormat": decision.get("trend", "stable")}],
},
{
"title": "Expert Failures (Top 10)",
@@ -120,7 +120,7 @@ def export_grafana_dashboard(
"title": "Flaky Candidates",
"type": "table",
"gridPos": {"h": 6, "w": 12, "x": 0, "y": 12},
- "targets": [{"expr": f"test_flaky_candidates{{app=\"tagent\"}}", "format": "table"}],
+ "targets": [{"expr": "test_flaky_candidates{app=\"tagent\"}", "format": "table"}],
},
],
"time": {"from": "now-7d", "to": "now"},
diff --git a/runtime/observability/audit.py b/runtime/observability/audit.py
index c453e5c..e62ec0d 100644
--- a/runtime/observability/audit.py
+++ b/runtime/observability/audit.py
@@ -10,7 +10,7 @@
import threading
from datetime import datetime, timezone
from pathlib import Path
-from typing import Any, Optional
+from typing import Any
from loguru import logger
@@ -30,7 +30,7 @@ def log_event(
resource: str = "",
resource_id: str = "",
actor: str = "",
- details: Optional[dict[str, Any]] = None,
+ details: dict[str, Any] | None = None,
outcome: str = "success",
) -> None:
"""Append one audit event to today's JSONL file. Thread‑safe.
@@ -61,10 +61,10 @@ def log_event(
def query_events(
- action: Optional[str] = None,
- resource: Optional[str] = None,
- resource_id: Optional[str] = None,
- actor: Optional[str] = None,
+ action: str | None = None,
+ resource: str | None = None,
+ resource_id: str | None = None,
+ actor: str | None = None,
limit: int = 100,
since_days: int = 7,
) -> list[dict[str, Any]]:
diff --git a/runtime/observability/dashboard.py b/runtime/observability/dashboard.py
index 2a80847..1afa3cc 100644
--- a/runtime/observability/dashboard.py
+++ b/runtime/observability/dashboard.py
@@ -6,7 +6,6 @@
from __future__ import annotations
import json
-from datetime import datetime, timezone
from pathlib import Path
from typing import Any
@@ -52,10 +51,7 @@ def build_decision_signal(runs: list[dict[str, Any]]) -> dict[str, Any]:
# MTTD/MTTR estimates from run durations
durations = [r.get("duration_ms", r.get("elapsed_ms", 0)) for r in runs if r.get("duration_ms") or r.get("elapsed_ms")]
- if durations:
- avg_dur = sum(durations) / len(durations) / 1000 / 60 # minutes
- else:
- avg_dur = 0
+ avg_dur = sum(durations) / len(durations) / 1000 / 60 if durations else 0 # minutes
return {
"pass_rate_pct": avg_pass,
@@ -142,7 +138,7 @@ def build_dashboard(workspace_dir: Path) -> dict[str, Any]:
actions = build_action_items(runs, diagnostic["expert_heatmap"])
total = len(runs)
- pass_rates = [
+ [
(r.get("succeeded", r.get("passed", 0)) / max(r.get("total", 1), 1))
for r in runs
]
diff --git a/runtime/observability/dora_tracker.py b/runtime/observability/dora_tracker.py
index 26373fe..5eaa1b5 100644
--- a/runtime/observability/dora_tracker.py
+++ b/runtime/observability/dora_tracker.py
@@ -17,7 +17,7 @@
import threading
import time
from collections import defaultdict
-from dataclasses import dataclass, field
+from dataclasses import dataclass
from pathlib import Path
from typing import Any
diff --git a/runtime/observability/otel.py b/runtime/observability/otel.py
index c071440..45235d8 100644
--- a/runtime/observability/otel.py
+++ b/runtime/observability/otel.py
@@ -2,8 +2,8 @@
from __future__ import annotations
+from collections.abc import Iterator
from contextlib import contextmanager
-from typing import Iterator
from loguru import logger
diff --git a/runtime/observability/prometheus_metrics.py b/runtime/observability/prometheus_metrics.py
index 010e560..d9667ca 100644
--- a/runtime/observability/prometheus_metrics.py
+++ b/runtime/observability/prometheus_metrics.py
@@ -13,9 +13,7 @@
from __future__ import annotations
import threading
-import time
-from collections import defaultdict
-from typing import Any
+from collections import defaultdict, deque
class MetricsRegistry:
@@ -31,8 +29,9 @@ def __init__(self) -> None:
self.circuit_broken: int = 0
self.last_pass_rate: float = 0.0
# Histogram buckets (seconds): 0.1, 0.5, 1, 5, 10, 30, 60, 120, 300, 600
- self.run_durations: list[float] = []
- self.llm_call_durations: list[float] = []
+ self._MAX_HISTOGRAM_SAMPLES = 1000
+ self.run_durations: deque[float] = deque(maxlen=self._MAX_HISTOGRAM_SAMPLES)
+ self.llm_call_durations: deque[float] = deque(maxlen=self._MAX_HISTOGRAM_SAMPLES)
self.HISTOGRAM_BUCKETS = [0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0]
def inc_runs(self) -> None:
@@ -47,14 +46,10 @@ def dec_active(self) -> None:
def record_run_duration(self, seconds: float) -> None:
with self._lock:
self.run_durations.append(seconds)
- if len(self.run_durations) > 1000:
- self.run_durations = self.run_durations[-500:]
def record_llm_duration(self, seconds: float) -> None:
with self._lock:
self.llm_call_durations.append(seconds)
- if len(self.llm_call_durations) > 1000:
- self.llm_call_durations = self.llm_call_durations[-500:]
def inc_agent_error(self, agent_name: str) -> None:
with self._lock:
@@ -68,7 +63,7 @@ def set_circuit(self, broken: bool) -> None:
with self._lock:
self.circuit_broken = 1 if broken else 0
- def _bucket_counts(self, values: list[float]) -> dict[float, int]:
+ def _bucket_counts(self, values: deque[float]) -> dict[float, int]:
counts: dict[float, int] = {}
for b in self.HISTOGRAM_BUCKETS:
counts[b] = sum(1 for v in values if v <= b)
diff --git a/runtime/orchestrator/INDEX.md b/runtime/orchestrator/INDEX.md
index 309bca1..83eac47 100644
--- a/runtime/orchestrator/INDEX.md
+++ b/runtime/orchestrator/INDEX.md
@@ -5,8 +5,8 @@
| 文件 | 用途 |
|------|------|
| `flows.py` | Prefect `@flow` 主入口,接收 router DAG 跑全链路 |
-| `tasks.py` | `@task` 原子(调专家/Skill/49 脚本) |
-| `adapters/` | 包装 `05-代码示例/*.py` 49 脚本为 Prefect task |
+| `tasks.py` | `@task` 原子(调专家/Skill/67 脚本) |
+| `adapters/` | 包装 `utils/*.py` 67 脚本为 Prefect task |
## 编排能力
diff --git a/runtime/orchestrator/adapters/__init__.py b/runtime/orchestrator/adapters/__init__.py
index 3da1c67..dadc913 100644
--- a/runtime/orchestrator/adapters/__init__.py
+++ b/runtime/orchestrator/adapters/__init__.py
@@ -1,4 +1,4 @@
-"""Adapter layer: wrap 05-代码示例/*.py 49 scripts as Prefect tasks without modifying them.
+"""Adapter layer: wrap utils/*.py 49 scripts as Prefect tasks without modifying them.
Each adapter shells out via subprocess to isolate import paths and side effects.
"""
diff --git a/runtime/orchestrator/adapters/experts.py b/runtime/orchestrator/adapters/experts.py
index 38c6375..fe34857 100644
--- a/runtime/orchestrator/adapters/experts.py
+++ b/runtime/orchestrator/adapters/experts.py
@@ -24,7 +24,7 @@
from runtime.orchestrator.adapters.scripts import ScriptResult, list_available_scripts, run_script
# Canonical script mapping. Names without a script run as a no-op step (logged only).
-# Mapping derived from existing 05-代码示例 filenames; missing scripts degrade gracefully.
+# Mapping derived from existing utils filenames; missing scripts degrade gracefully.
EXPERT_SCRIPT_MAP: dict[str, str | None] = {
"test-lead": None,
"requirements-analyst": None,
@@ -51,12 +51,12 @@
}
# V1.14 防 mock 单源 (ROADMAP V1.15 Day 0 承诺):
-# 实装状态读 registry catalog (02-专家定义/03-技能定义 *.md frontmatter
+# 实装状态读 registry catalog (agents/skills *.md frontmatter
# EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS),避免 hardcoded dict 与 .md 双源漂移。
#
# 合法值 (registry._VALID_IMPL_STATUS 同步):
# - production: 真 LLM-driven runner (orchestrator/agents/*.py) 已实装
-# - script: 真 script-backed (05-代码示例/*.py) 已实装
+# - script: 真 script-backed (utils/*.py) 已实装
# - rollout: V1.x rollout 待实装 → execute_node 拒绝路由,不输出 mock
# - vision: V2.x 方法论参考 → 同 rollout 处理
# - unknown: frontmatter 缺失/非法值 → 同 rollout 处理 (fail closed)
@@ -152,22 +152,26 @@ def _resolve_script(name: str, kind: str) -> str | None:
return None
+import threading as _threading # noqa: E402
+
_upstream_outputs: dict[str, dict] = {} # 流水线内每 expert 产物缓存,供下游 RunnerContext.upstream
_upstream_meta: dict[str, dict] = {} # 流水线内每 expert 元信息 (ok/degraded/error),供下游 RunnerContext.upstream_meta
# 防 mock 闭环: test-lead 看到任一 degraded → 决策降级
+_upstream_lock = _threading.Lock() # 防御性锁: 拓扑排序保证依赖顺序,锁仅防未来并行分支
def reset_upstream_cache() -> None:
"""每次新 run 开始前由 flow 调,清空上游产物缓存."""
- _upstream_outputs.clear()
- _upstream_meta.clear()
+ with _upstream_lock:
+ _upstream_outputs.clear()
+ _upstream_meta.clear()
def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: int = 1800) -> StepOutcome:
inputs = inputs or {}
# V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 拒绝路由未实装 expert/skill,不输出 mock 数据
- # 单源 = 02-专家定义/03-技能定义 .md frontmatter (registry catalog)
+ # 单源 = agents/skills .md frontmatter (registry catalog)
if kind in ("expert", "skill"):
status = _get_impl_status(name, kind)
if status in ("rollout", "vision"):
@@ -219,8 +223,9 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i
import time as _t
t0 = _t.time()
res = runner.run(ctx)
- _upstream_outputs[name] = res.output
- _upstream_meta[name] = {
+ with _upstream_lock:
+ _upstream_outputs[name] = res.output
+ _upstream_meta[name] = {
"ok": res.ok,
"degraded": res.degraded,
"error": res.error,
@@ -245,8 +250,8 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i
if kind == "skill":
try:
from runtime.config.settings import get_settings
- from runtime.orchestrator.skills import get_skill_runner
from runtime.orchestrator.agents.base import RunnerContext
+ from runtime.orchestrator.skills import get_skill_runner
runner = get_skill_runner(name)
if runner is not None:
@@ -263,8 +268,9 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i
import time as _t
t0 = _t.time()
res = runner.run(ctx)
- _upstream_outputs[name] = res.output
- _upstream_meta[name] = {
+ with _upstream_lock:
+ _upstream_outputs[name] = res.output
+ _upstream_meta[name] = {
"ok": res.ok,
"degraded": res.degraded,
"error": res.error,
@@ -304,7 +310,7 @@ def execute_node(name: str, kind: str, *, inputs: dict | None = None, timeout: i
executed_script=script,
returncode=127,
stdout="",
- stderr=f"script '{script}' not found under 05-代码示例/",
+ stderr=f"script '{script}' not found under utils/",
duration_ms=0,
)
defaults = SCRIPT_DEFAULT_ARGS.get(script, {})
diff --git a/runtime/orchestrator/adapters/perf_orchestrator.py b/runtime/orchestrator/adapters/perf_orchestrator.py
index 9c74137..573e347 100644
--- a/runtime/orchestrator/adapters/perf_orchestrator.py
+++ b/runtime/orchestrator/adapters/perf_orchestrator.py
@@ -11,14 +11,13 @@
from __future__ import annotations
import json
-import statistics
+import os
import subprocess
-import sys
import time
+from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any, Callable
@dataclass
@@ -88,6 +87,7 @@ def http_benchmark(self, fn: Callable[[], bool], concurrent: int,
errors = 0
def worker():
+ nonlocal errors
t0 = time.time()
try:
ok = fn()
@@ -106,7 +106,6 @@ def worker():
if len(futures) > concurrent * 2:
for f in as_completed(futures[:concurrent]):
if f.result():
- nonlocal success
success += 1
futures = futures[concurrent:]
diff --git a/runtime/orchestrator/adapters/script_bridge.py b/runtime/orchestrator/adapters/script_bridge.py
index dbf32f3..727e1f3 100644
--- a/runtime/orchestrator/adapters/script_bridge.py
+++ b/runtime/orchestrator/adapters/script_bridge.py
@@ -1,4 +1,4 @@
-"""Bridge standalone 05-代码示例 scripts into the orchestrator pipeline.
+"""Bridge standalone utils scripts into the orchestrator pipeline.
Each adapter wraps a standalone script with:
- Input normalization (DAG node inputs → CLI args / stdin JSON)
@@ -17,7 +17,7 @@
def _scripts_dir() -> Path:
- return Path(__file__).resolve().parents[3] / "05-代码示例"
+ return Path(__file__).resolve().parents[3] / "utils"
def _run_script(script_name: str, args: list[str] | None = None,
diff --git a/runtime/orchestrator/adapters/scripts.py b/runtime/orchestrator/adapters/scripts.py
index 6e2244f..2465112 100644
--- a/runtime/orchestrator/adapters/scripts.py
+++ b/runtime/orchestrator/adapters/scripts.py
@@ -1,4 +1,4 @@
-"""Adapter: wrap `05-代码示例/*.py` scripts as callable units.
+"""Adapter: wrap `utils/*.py` scripts as callable units.
Uses subprocess to isolate sys.path / globals from the runtime layer.
"""
@@ -30,7 +30,7 @@ def ok(self) -> bool:
def run_script(script_filename: str, args: list[str] | None = None, *, timeout: int = 1800) -> ScriptResult:
- """Run a script under 05-代码示例/ by filename.
+ """Run a script under utils/ by filename.
Args:
script_filename: e.g. "smoke_runner.py" (must live under scripts_dir).
diff --git a/runtime/orchestrator/agents/INDEX.md b/runtime/orchestrator/agents/INDEX.md
index e8016ad..6c2caf4 100644
--- a/runtime/orchestrator/agents/INDEX.md
+++ b/runtime/orchestrator/agents/INDEX.md
@@ -1,4 +1,4 @@
-# runtime/orchestrator/agents/ 索引(V1.32.5)
+# runtime/orchestrator/agents/ 索引(V1.36.0)
> 真 LLM-driven expert runner · 16 核心 expert 全落地 · 主宪章 §40 真 agent 落地 canon。
@@ -6,11 +6,11 @@
| Runner | 角色源 | 上游 | 产物 |
|--------|--------|------|------|
-| `requirements-analyst` | 02-专家定义/02-需求分析.md | PRD(artifact_text) | `requirements_summary.json` |
-| `automation-engineer` | 02-专家定义/06-自动化脚本.md | requirements-analyst | `automation_scripts_plan.json` |
-| `test-executor` | 02-专家定义/07-测试执行.md | automation-engineer | `execution_plan.json` |
-| `bug-manager` | 02-专家定义/08-Bug管理.md | test-executor | `bug_drafts.json`(BugTracker-ready) |
-| `test-lead` | 02-专家定义/01-测试主管.md | 全链路 | `final_verdict_*.json`(上线决策) |
+| `requirements-analyst` | agents/02-需求分析.md | PRD(artifact_text) | `requirements_summary.json` |
+| `automation-engineer` | agents/06-自动化脚本.md | requirements-analyst | `automation_scripts_plan.json` |
+| `test-executor` | agents/07-测试执行.md | automation-engineer | `execution_plan.json` |
+| `bug-manager` | agents/08-Bug管理.md | test-executor | `bug_drafts.json`(BugTracker-ready) |
+| `test-lead` | agents/01-测试主管.md | 全链路 | `final_verdict_*.json`(上线决策) |
## 0 未实现(V1.32 rollout 完成)
diff --git a/runtime/orchestrator/agents/__init__.py b/runtime/orchestrator/agents/__init__.py
index a48f030..b1927b2 100644
--- a/runtime/orchestrator/agents/__init__.py
+++ b/runtime/orchestrator/agents/__init__.py
@@ -1,6 +1,6 @@
-"""Real LLM-driven agent runners(V1.32.5 · 主宪章 §33 + §40).
+"""Real LLM-driven agent runners(V1.36.0 · 主宪章 §33 + §40).
-每个 runner 把 02-专家定义/*.md 的角色描述变成可执行的 LLM 调用:
+每个 runner 把 agents/*.md 的角色描述变成可执行的 LLM 调用:
- 读上游产物 → 拼 prompt → 调 LLM → 解析输出 → 落产物 → 给下游
11 核心 runner(V1.x rollout 收尾,所有 LLM-driven expert 已实装):
@@ -22,19 +22,23 @@
testcase-designer / data-preparer / report-generator / desktop-tester / ai-tester)。
"""
-from runtime.orchestrator.agents.base import AGENT_RUNNERS, AgentRunner, RunnerContext, get_runner # noqa: F401
-
# 触发注册(每个模块加载时 @register 注册到 AGENT_RUNNERS)
from runtime.orchestrator.agents import ( # noqa: F401,E402
- requirements_analyst,
automation_engineer,
- test_executor,
+ automotive_tester,
bug_manager,
- test_lead,
env_manager,
mobile_tester,
- visual_tester,
- system_tester,
pentest_tester,
- automotive_tester,
+ requirements_analyst,
+ system_tester,
+ test_executor,
+ test_lead,
+ visual_tester,
+)
+from runtime.orchestrator.agents.base import ( # noqa: F401
+ AGENT_RUNNERS,
+ AgentRunner,
+ RunnerContext,
+ get_runner,
)
diff --git a/runtime/orchestrator/agents/automation_engineer.py b/runtime/orchestrator/agents/automation_engineer.py
index 43100aa..d3d67aa 100644
--- a/runtime/orchestrator/agents/automation_engineer.py
+++ b/runtime/orchestrator/agents/automation_engineer.py
@@ -12,7 +12,7 @@
class AutomationEngineer(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 automation-engineer 专家(02-专家定义/06-自动化脚本.md)。\n"
+ "你是 Test-Agent 项目内 automation-engineer 专家(agents/06-自动化脚本.md)。\n"
"职责:把 testcase-designer 给的用例转为 pytest + Playwright(UI)/ requests(API)脚本骨架。\n"
"原则:\n"
"1) Page Object 模式(UI)/ 数据驱动(API)\n"
diff --git a/runtime/orchestrator/agents/automotive_tester.py b/runtime/orchestrator/agents/automotive_tester.py
index 8f52125..858f100 100644
--- a/runtime/orchestrator/agents/automotive_tester.py
+++ b/runtime/orchestrator/agents/automotive_tester.py
@@ -25,7 +25,7 @@
class AutomotiveTester(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 automotive-tester 专家(02-专家定义/16-车载测试.md)。\n"
+ "你是 Test-Agent 项目内 automotive-tester 专家(agents/16-车载测试.md)。\n"
"职责:基于 PRD + 车载上下文,生成 ASIL 评估 + 测试用例 + ADAS 场景 + OTA 计划 + 合规矩阵。\n"
"原则:\n"
"1) 识别子系统:ecu / adas / ivi / v2x / multi\n"
diff --git a/runtime/orchestrator/agents/base.py b/runtime/orchestrator/agents/base.py
index 7665b1d..2746863 100644
--- a/runtime/orchestrator/agents/base.py
+++ b/runtime/orchestrator/agents/base.py
@@ -49,7 +49,7 @@ class AgentRunner(abc.ABC):
@abc.abstractmethod
def system_prompt(self) -> str:
- """从 02-专家定义/*.md 提炼的角色 prompt."""
+ """从 agents/*.md 提炼的角色 prompt."""
@abc.abstractmethod
def user_prompt(self, ctx: RunnerContext) -> str:
@@ -139,7 +139,7 @@ def run(self, ctx: RunnerContext) -> RunnerResult:
def _parse_json(raw: str) -> dict[str, Any]:
raw = raw.strip()
if raw.startswith("```"):
- raw = raw.strip("`")
+ raw = raw[3:-3].strip() if raw.endswith("```") else raw[3:]
if "\n" in raw:
_, raw = raw.split("\n", 1)
start = raw.find("{")
diff --git a/runtime/orchestrator/agents/bug_manager.py b/runtime/orchestrator/agents/bug_manager.py
index 120ed00..01b854a 100644
--- a/runtime/orchestrator/agents/bug_manager.py
+++ b/runtime/orchestrator/agents/bug_manager.py
@@ -12,7 +12,7 @@
class BugManager(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 bug-manager 专家(02-专家定义/08-Bug管理.md)。\n"
+ "你是 Test-Agent 项目内 bug-manager 专家(agents/08-Bug管理.md)。\n"
"职责:把 test-executor 的失败列表转 BugTracker-ready Bug(默认 zentao,可换 Jira/GitHub Issues 等,主宪章 §37)。\n"
"原则:\n"
"1) severity 权威映射:1=P0(阻塞)/ 2=P1(高)/ 3=P2(中)/ 4=P3(低)\n"
diff --git a/runtime/orchestrator/agents/env_manager.py b/runtime/orchestrator/agents/env_manager.py
index d3b2e67..b766843 100644
--- a/runtime/orchestrator/agents/env_manager.py
+++ b/runtime/orchestrator/agents/env_manager.py
@@ -18,7 +18,7 @@
class EnvManager(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 env-manager 专家(02-专家定义/04-环境管理.md)。\n"
+ "你是 Test-Agent 项目内 env-manager 专家(agents/04-环境管理.md)。\n"
"职责:基于 PRD 与上游需求摘要,生成测试环境检查清单 + 准备步骤。\n"
"原则:\n"
"1) 仅针对 test / staging 环境,prod 严禁\n"
diff --git a/runtime/orchestrator/agents/mobile_tester.py b/runtime/orchestrator/agents/mobile_tester.py
index 7260bfa..a5b9fd3 100644
--- a/runtime/orchestrator/agents/mobile_tester.py
+++ b/runtime/orchestrator/agents/mobile_tester.py
@@ -19,7 +19,7 @@
class MobileTester(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 mobile-tester 专家(02-专家定义/10-移动测试.md)。\n"
+ "你是 Test-Agent 项目内 mobile-tester 专家(agents/10-移动测试.md)。\n"
"职责:基于 PRD + 上游摘要,生成移动端测试用例 + ADB/Xcode 命令清单。\n"
"原则:\n"
"1) 识别目标平台:Android / iOS / 微信/支付宝/抖音 小程序 / 混合 H5\n"
diff --git a/runtime/orchestrator/agents/pentest_tester.py b/runtime/orchestrator/agents/pentest_tester.py
index 4744881..f9168f7 100644
--- a/runtime/orchestrator/agents/pentest_tester.py
+++ b/runtime/orchestrator/agents/pentest_tester.py
@@ -24,7 +24,7 @@
class PentestTester(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 pentest-tester 专家(02-专家定义/15-渗透测试.md)。\n"
+ "你是 Test-Agent 项目内 pentest-tester 专家(agents/15-渗透测试.md)。\n"
"职责:基于 PRD + 安全上下文,生成渗透测试计划 + 工具调用清单(仅计划,不执行)。\n"
"原则:\n"
"1) 识别测试模式:whitebox(有源码) / blackbox(仅 URL/IP) / graybox(部分 API doc)\n"
diff --git a/runtime/orchestrator/agents/requirements_analyst.py b/runtime/orchestrator/agents/requirements_analyst.py
index 8246838..d34c2b0 100644
--- a/runtime/orchestrator/agents/requirements_analyst.py
+++ b/runtime/orchestrator/agents/requirements_analyst.py
@@ -12,7 +12,7 @@
class RequirementsAnalyst(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 requirements-analyst 专家(02-专家定义/02-需求分析.md)。\n"
+ "你是 Test-Agent 项目内 requirements-analyst 专家(agents/02-需求分析.md)。\n"
"职责:把任意格式 PRD(md/pdf/docx/url/口头)解析为结构化测试需求摘要。\n"
"原则:\n"
"1) 识别核心功能 + 边界场景 + 高风险区\n"
diff --git a/runtime/orchestrator/agents/system_tester.py b/runtime/orchestrator/agents/system_tester.py
index 7febefc..53f7cc8 100644
--- a/runtime/orchestrator/agents/system_tester.py
+++ b/runtime/orchestrator/agents/system_tester.py
@@ -21,7 +21,7 @@
class SystemTester(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 system-tester 专家(02-专家定义/13-系统集成测试.md)。\n"
+ "你是 Test-Agent 项目内 system-tester 专家(agents/13-系统集成测试.md)。\n"
"职责:基于 PRD + 系统拓扑上下文,生成系统集成测试用例 + 设备命令清单 + 协议特定配置。\n"
"原则:\n"
"1) 识别系统目标类型:iot / audiovideo / tracing / mq / integration / multi\n"
diff --git a/runtime/orchestrator/agents/test_executor.py b/runtime/orchestrator/agents/test_executor.py
index 383525e..fde614e 100644
--- a/runtime/orchestrator/agents/test_executor.py
+++ b/runtime/orchestrator/agents/test_executor.py
@@ -12,7 +12,7 @@
class TestExecutor(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 test-executor 专家(02-专家定义/07-测试执行.md)。\n"
+ "你是 Test-Agent 项目内 test-executor 专家(agents/07-测试执行.md)。\n"
"职责:接 automation-engineer 的脚本规划 → 输出执行计划 + 失败分类策略 + Flaky 标记规则。\n"
"原则:\n"
"1) 四阶段执行:冒烟(P0) → 回归(P0+P1) → 全量 → 性能\n"
diff --git a/runtime/orchestrator/agents/test_lead.py b/runtime/orchestrator/agents/test_lead.py
index 21dab32..f621a5f 100644
--- a/runtime/orchestrator/agents/test_lead.py
+++ b/runtime/orchestrator/agents/test_lead.py
@@ -12,7 +12,7 @@
class TestLead(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 test-lead 专家(02-专家定义/01-测试主管.md)。\n"
+ "你是 Test-Agent 项目内 test-lead 专家(agents/01-测试主管.md)。\n"
"职责:看上游所有专家产物 → 出**上线决策**(go / no-go / conditional)。\n"
"原则:\n"
"1) 看 requirements / scripts / execution_plan / bug_drafts 完整链路\n"
@@ -105,7 +105,7 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]:
known_risks = ["此为 stub LLM 输出,非真测试数据"]
else:
verdict = "go"
- summary_zh = f"selftest mock 验证 · GO"
+ summary_zh = "selftest mock 验证 · GO"
rationale = (
"本次为 selftest fixture mock 运行 · 主流程编排链路全通 · "
"P0 Bug=0,自动判 go · 真生产环境请填真 PRD + 真 LLM 再判。"
@@ -126,7 +126,8 @@ def mock_output(self, ctx: RunnerContext) -> dict[str, Any]:
}
def output_file(self, ctx: RunnerContext) -> Path | None:
- return ctx.workspace / "执行日志" / "decisions" / f"final_verdict_{int(ctx.workspace.stat().st_mtime if ctx.workspace.exists() else 0)}.json"
+ import uuid
+ return ctx.workspace / "执行日志" / "decisions" / f"final_verdict_{uuid.uuid4().hex[:12]}.json"
def summary(self, output: dict[str, Any]) -> str:
return f"决策:{output.get('verdict', '?').upper()} · {output.get('summary_zh', '')[:60]}"
diff --git a/runtime/orchestrator/agents/visual_tester.py b/runtime/orchestrator/agents/visual_tester.py
index ecdf54c..72298e3 100644
--- a/runtime/orchestrator/agents/visual_tester.py
+++ b/runtime/orchestrator/agents/visual_tester.py
@@ -19,7 +19,7 @@
class VisualTester(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 visual-tester 专家(02-专家定义/12-视觉游戏测试.md)。\n"
+ "你是 Test-Agent 项目内 visual-tester 专家(agents/12-视觉游戏测试.md)。\n"
"职责:基于 PRD + UI 描述,生成视觉测试点 + 视觉对比脚本片段 + 容差配置。\n"
"原则:\n"
"1) 识别视觉目标类型:web-canvas / webgl / unity / unreal / mobile-game / ocr / visual-regression\n"
diff --git a/runtime/orchestrator/direct.py b/runtime/orchestrator/direct.py
index 60e7182..098610d 100644
--- a/runtime/orchestrator/direct.py
+++ b/runtime/orchestrator/direct.py
@@ -10,8 +10,6 @@
from concurrent.futures import Future, ThreadPoolExecutor
from typing import Any
-from loguru import logger
-
from runtime.observability.logging import bind_run, configure_logging
from runtime.observability.otel import init_tracing, span
from runtime.orchestrator.adapters.experts import execute_node, reset_upstream_cache
@@ -19,6 +17,36 @@
from runtime.self_healing.retry import with_retry
+def _is_abort_exception(exc: Exception) -> bool:
+ """Check if exception signals an on_failure=abort (not a transient error)."""
+ return isinstance(exc, RuntimeError) and "aborted" in str(exc)
+
+
+def _run_node_with_retry(node: DAGNode, pool: ThreadPoolExecutor, results: dict, log) -> None:
+ """Execute a node with retries, respecting on_failure=abort."""
+ nid = node.id
+ try:
+ results[nid] = pool.submit(_run_node, node).result()
+ except Exception as exc:
+ log.warning("node {} attempt failed: {}", nid, exc)
+ if node.on_failure == "abort" or _is_abort_exception(exc):
+ results[nid] = {"id": nid, "ok": False, "error": str(exc), "aborted": True}
+ return
+ # retry up to 2 more times for transient errors
+ for attempt in range(2):
+ time.sleep(2 ** attempt)
+ try:
+ results[nid] = pool.submit(_run_node, node).result()
+ return
+ except Exception as retry_exc:
+ log.warning("node {} retry {}/2 failed", nid, attempt + 1)
+ if node.on_failure == "abort" or _is_abort_exception(retry_exc):
+ results[nid] = {"id": nid, "ok": False, "error": str(retry_exc), "aborted": True}
+ return
+ if attempt == 1:
+ results[nid] = {"id": nid, "ok": False, "error": str(retry_exc)}
+
+
def _run_node(node: DAGNode) -> dict[str, Any]:
from runtime.orchestrator.hooks import get_hook_registry
@@ -72,8 +100,9 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers:
pending = set(by_id.keys())
futures: dict[str, Future] = {}
circuit_broken = False
- pool = ThreadPoolExecutor(max_workers=max_workers)
+ pool = None
try:
+ pool = ThreadPoolExecutor(max_workers=max_workers)
with span("flow.run", run_id=run_id, nodes=len(ordered)):
while pending:
# circuit breaker: stop submitting new work
@@ -99,57 +128,47 @@ def run_decision_direct(decision_dict: dict[str, Any], run_id: str, max_workers:
done_now = [nid for nid, f in futures.items() if f.done() and nid in pending]
if not done_now:
# block on the oldest pending future
- next_id = next(iter(futures))
+ next_id = next(nid for nid in futures if nid in pending)
try:
results[next_id] = futures[next_id].result()
- except Exception as e: # noqa: BLE001
- log.warning("node {} attempt failed: {}", next_id, e)
- for attempt in range(2):
- time.sleep(2**attempt)
- try:
- fut = pool.submit(_run_node, by_id[next_id])
- results[next_id] = fut.result()
- break
- except Exception as retry_exc: # noqa: BLE001
- log.warning("node {} retry {}/2 failed", next_id, attempt + 1)
- if attempt == 1:
- results[next_id] = {"id": next_id, "ok": False, "error": str(retry_exc)}
- if results.get(next_id):
- if results[next_id].get("skipped"):
+ except Exception as exc:
+ log.warning("node {} attempt failed: {}", next_id, exc)
+ _run_node_with_retry(by_id[next_id], pool, results, log)
+ r = results.get(next_id)
+ if r:
+ if r.get("skipped"):
skipped.append(next_id)
- elif not results[next_id].get("ok"):
+ elif not r.get("ok"):
failures.append(next_id)
- if len(failures) >= MAX_FAILURES:
- log.error("circuit breaker: {} failures, aborting DAG", len(failures))
+ if r.get("aborted") or len(failures) >= MAX_FAILURES:
+ if r.get("aborted"):
+ log.error("node {} aborted, terminating DAG", next_id)
+ else:
+ log.error("circuit breaker: {} failures, aborting DAG", len(failures))
circuit_broken = True
pending.discard(next_id)
continue
for nid in done_now:
try:
results[nid] = futures[nid].result()
- except Exception as e: # noqa: BLE001
- log.warning("node {} attempt failed: {}", nid, e)
- for attempt in range(2):
- time.sleep(2**attempt)
- try:
- fut = pool.submit(_run_node, by_id[nid])
- results[nid] = fut.result()
- break
- except Exception as retry_exc: # noqa: BLE001
- log.warning("node {} retry {}/2 failed", nid, attempt + 1)
- if attempt == 1:
- results[nid] = {"id": nid, "ok": False, "error": str(retry_exc)}
- if results.get(nid):
- if results[nid].get("skipped"):
+ except Exception as exc:
+ results[nid] = {"id": nid, "ok": False, "error": str(exc), "aborted": _is_abort_exception(exc)}
+ r = results.get(nid)
+ if r:
+ if r.get("skipped"):
skipped.append(nid)
- elif not results[nid].get("ok"):
+ elif not r.get("ok"):
failures.append(nid)
- if len(failures) >= MAX_FAILURES:
- log.error("circuit breaker: {} failures, aborting DAG", len(failures))
+ if r.get("aborted") or len(failures) >= MAX_FAILURES:
+ if r.get("aborted"):
+ log.error("node {} aborted, terminating DAG", nid)
+ else:
+ log.error("circuit breaker: {} failures, aborting DAG", len(failures))
circuit_broken = True
pending.discard(nid)
finally:
- pool.shutdown(wait=True)
+ if pool is not None:
+ pool.shutdown(wait=True)
completed = len(results)
log.info("DAG progress: {}/{} nodes done, {} failed, {} skipped", completed, len(ordered), len(failures), len(skipped))
diff --git a/runtime/orchestrator/flows.py b/runtime/orchestrator/flows.py
index 53062a2..dc5b4fd 100644
--- a/runtime/orchestrator/flows.py
+++ b/runtime/orchestrator/flows.py
@@ -4,7 +4,6 @@
from typing import Any
-from loguru import logger
from prefect import flow
from prefect.task_runners import ConcurrentTaskRunner
@@ -54,6 +53,18 @@ def run_decision_flow(decision_dict: dict[str, Any], run_id: str) -> dict[str, A
log.error("circuit breaker: {} failures, aborting DAG", len(failures))
break
log.info("DAG progress: {}/{} nodes done", i, total)
+ else:
+ # no break — all futures completed normally
+ pass
+ # Cancel any remaining in-flight futures after circuit breaker or abort
+ cancelled = 0
+ for nid, fut in futures.items():
+ if nid not in results and not fut.state.is_final():
+ if hasattr(fut, "cancel"):
+ fut.cancel()
+ cancelled += 1
+ if cancelled:
+ log.warning("circuit breaker: cancelled {} in-flight task(s)", cancelled)
# L2-C: 识别 rollout 节点 + on_failure=skip 节点
rollout_skipped = [
diff --git a/runtime/orchestrator/hooks.py b/runtime/orchestrator/hooks.py
index 15c03d7..99bad88 100644
--- a/runtime/orchestrator/hooks.py
+++ b/runtime/orchestrator/hooks.py
@@ -5,10 +5,13 @@
from __future__ import annotations
+from collections.abc import Callable
from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List
+from typing import Any
-NodeHook = Callable[[str, Dict[str, Any]], None]
+from loguru import logger
+
+NodeHook = Callable[[str, dict[str, Any]], None]
"""Hook signature: (node_id, node_ctx) → None.
node_ctx keys: name, kind, inputs, timeout, results (after_node only), error (on_error only).
@@ -17,9 +20,9 @@
@dataclass
class HookRegistry:
- before: List[NodeHook] = field(default_factory=list)
- after: List[NodeHook] = field(default_factory=list)
- on_error: List[NodeHook] = field(default_factory=list)
+ before: list[NodeHook] = field(default_factory=list)
+ after: list[NodeHook] = field(default_factory=list)
+ on_error: list[NodeHook] = field(default_factory=list)
def register_before(self, fn: NodeHook) -> None:
self.before.append(fn)
@@ -30,26 +33,26 @@ def register_after(self, fn: NodeHook) -> None:
def register_error(self, fn: NodeHook) -> None:
self.on_error.append(fn)
- def fire_before(self, node_id: str, ctx: Dict[str, Any]) -> None:
+ def fire_before(self, node_id: str, ctx: dict[str, Any]) -> None:
for fn in self.before:
try:
fn(node_id, ctx)
except Exception:
- pass # hooks must not break execution
+ logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id)
- def fire_after(self, node_id: str, ctx: Dict[str, Any]) -> None:
+ def fire_after(self, node_id: str, ctx: dict[str, Any]) -> None:
for fn in self.after:
try:
fn(node_id, ctx)
except Exception:
- pass
+ logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id)
- def fire_error(self, node_id: str, ctx: Dict[str, Any]) -> None:
+ def fire_error(self, node_id: str, ctx: dict[str, Any]) -> None:
for fn in self.on_error:
try:
fn(node_id, ctx)
except Exception:
- pass
+ logger.debug("hook {}.{} failed for node {}", getattr(fn, '__module__', ''), getattr(fn, '__name__', repr(fn)), node_id)
# Global singleton — callers can replace per-run with a fresh instance.
diff --git a/runtime/orchestrator/release_readiness.py b/runtime/orchestrator/release_readiness.py
index e752a90..37ec8ee 100644
--- a/runtime/orchestrator/release_readiness.py
+++ b/runtime/orchestrator/release_readiness.py
@@ -7,6 +7,7 @@
from __future__ import annotations
from dataclasses import dataclass
+from pathlib import Path
from typing import Any
@@ -116,7 +117,7 @@ def _cli() -> None:
if args.from_summary:
import json as _json
- data = _json.loads(args.from_summary.read_text(encoding="utf-8"))
+ data = _json.loads(Path(args.from_summary).read_text(encoding="utf-8"))
result = score_from_run_summary(data)
else:
result = score_readiness(
diff --git a/runtime/orchestrator/skills/__init__.py b/runtime/orchestrator/skills/__init__.py
index 6e3a30d..7d1c9bc 100644
--- a/runtime/orchestrator/skills/__init__.py
+++ b/runtime/orchestrator/skills/__init__.py
@@ -1,4 +1,4 @@
-"""Real LLM-driven skill runners (V1.32.5 · ALL 14/14 rollout complete).
+"""Real LLM-driven skill runners (V1.36.0 · ALL 14/14 rollout complete).
16 production runners across 3 domains:
- General: mobile-test, visual-test, system-test, eval-harness
@@ -7,10 +7,10 @@
"""
from runtime.orchestrator.agents.base import ( # noqa: F401
+ SKILL_RUNNERS,
AgentRunner,
RunnerContext,
RunnerResult,
- SKILL_RUNNERS,
get_skill_runner,
register_skill,
)
diff --git a/runtime/orchestrator/skills/automotive_adas_scenario.py b/runtime/orchestrator/skills/automotive_adas_scenario.py
index a7964ec..f44cb90 100644
--- a/runtime/orchestrator/skills/automotive_adas_scenario.py
+++ b/runtime/orchestrator/skills/automotive_adas_scenario.py
@@ -1,9 +1,11 @@
"""automotive-adas-scenario · ADAS 场景库测试编排 (V1.31.0)."""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("automotive-adas-scenario")
class AutomotiveAdasScenario(AgentRunner):
def system_prompt(self) -> str: return "你是 automotive-adas-scenario skill。AEB/ACC/LKA/APA/AVP/NOA 场景库编排。ODD 边界 + SOTIF ISO 21448 合规。输出严格 JSON。"
diff --git a/runtime/orchestrator/skills/automotive_can_bus_test.py b/runtime/orchestrator/skills/automotive_can_bus_test.py
index 62c4dc8..ce62fb9 100644
--- a/runtime/orchestrator/skills/automotive_can_bus_test.py
+++ b/runtime/orchestrator/skills/automotive_can_bus_test.py
@@ -1,9 +1,11 @@
"""automotive-can-bus-test · CAN/CAN-FD/LIN/FlexRay/SOME-IP 协议测试编排 (V1.31.0)."""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("automotive-can-bus-test")
class AutomotiveCanBusTest(AgentRunner):
def system_prompt(self) -> str: return "你是 automotive-can-bus-test skill。CAN/CAN-FD/LIN/FlexRay/SOME-IP + DoIP/UDS 诊断编排。协议一致性 + DBC解析 + 时序 + 故障注入。输出严格 JSON。"
diff --git a/runtime/orchestrator/skills/automotive_hil_loop_test.py b/runtime/orchestrator/skills/automotive_hil_loop_test.py
index c394828..d54f210 100644
--- a/runtime/orchestrator/skills/automotive_hil_loop_test.py
+++ b/runtime/orchestrator/skills/automotive_hil_loop_test.py
@@ -1,9 +1,11 @@
"""automotive-hil-loop-test · HIL/SIL/MIL/PIL 环路编排 (V1.31.0)."""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("automotive-hil-loop-test")
class AutomotiveHilLoopTest(AgentRunner):
def system_prompt(self) -> str: return "你是 automotive-hil-loop-test skill。MIL/SIL/PIL/HIL 4 环编排。ASIL C/D 必经 HIL(真 ECU+I/O)。故障注入 + 极端工况。输出严格 JSON。"
diff --git a/runtime/orchestrator/skills/automotive_ota_update_test.py b/runtime/orchestrator/skills/automotive_ota_update_test.py
index cbe6ae7..26f07a0 100644
--- a/runtime/orchestrator/skills/automotive_ota_update_test.py
+++ b/runtime/orchestrator/skills/automotive_ota_update_test.py
@@ -1,9 +1,11 @@
"""automotive-ota-update-test · OTA 升级测试编排 (V1.31.0)."""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("automotive-ota-update-test")
class AutomotiveOtaUpdateTest(AgentRunner):
def system_prompt(self) -> str: return "你是 automotive-ota-update-test skill。7 项必测: 包签名 + 差分 + A/B分区 + 断电恢复 + 行车安全 + DTC + 回退。UN R156 + GB 44496-2024 合规。输出严格 JSON。"
diff --git a/runtime/orchestrator/skills/automotive_test.py b/runtime/orchestrator/skills/automotive_test.py
index 8c52d21..d6ee284 100644
--- a/runtime/orchestrator/skills/automotive_test.py
+++ b/runtime/orchestrator/skills/automotive_test.py
@@ -3,10 +3,12 @@
10 阶段: HARA+ASIL → 静态 MISRA → 单元 MC/DC → SIL/PIL → HIL → CAN → ADAS → OTA → 合规 → 报告
"""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("automotive-test")
class AutomotiveTest(AgentRunner):
def system_prompt(self) -> str: return "你是 automotive-test 主编排 skill。10 阶段整车测试: HARA → ASIL → MISRA → MC/DC → SIL/PIL → HIL → CAN → ADAS → OTA → 合规审计。ISO 26262 + SOTIF + UN R155/R156 合规驱动。输出严格 JSON。"
diff --git a/runtime/orchestrator/skills/eval_harness.py b/runtime/orchestrator/skills/eval_harness.py
index 749c949..194acdf 100644
--- a/runtime/orchestrator/skills/eval_harness.py
+++ b/runtime/orchestrator/skills/eval_harness.py
@@ -4,7 +4,7 @@
- LLM 读 PRD + 上游 ai-tester expert 产物 → 5 阶段评测计划
(评测配置 / pass@k / 稳定性 / 延迟 / 报告归档)
+ 质量门禁 + 安全护栏
-- 不实装 03-技能定义/eval-harness.md 全部职责 (eval_replay.py 真跑
+- 不实装 skills/eval-harness.md 全部职责 (eval_replay.py 真跑
/ PII scrub 执行 / LongMemEval benchmark 等留后续深化)
- 输出评测计划 JSON, 真执行在 runtime/tutor/eval_replay.py + ai_validator.py
"""
@@ -21,7 +21,7 @@
class EvalHarness(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 eval-harness skill(03-技能定义/eval-harness.md)。\n"
+ "你是 Test-Agent 项目内 eval-harness skill(skills/eval-harness.md)。\n"
"职责:基于 PRD + 上游 ai-tester expert 产物,编排 LLM/AI 系统评测 5 阶段计划。\n"
"原则:\n"
"1) 识别评测目标:prompt 版本回归 / RAG retrieval 质量 / agent 路由准确率 / 模型升级对比\n"
diff --git a/runtime/orchestrator/skills/mobile_test.py b/runtime/orchestrator/skills/mobile_test.py
index d69d3d4..402e06b 100644
--- a/runtime/orchestrator/skills/mobile_test.py
+++ b/runtime/orchestrator/skills/mobile_test.py
@@ -4,7 +4,7 @@
- LLM 读 PRD + 上游 mobile-tester expert 产物 → 6 阶段执行计划
(设备就绪 / Appium / 用例批次 / 性能采集 / Monkey / 报告归档)
+ 质量门禁 + 跨平台并行策略
-- 不实装 03-技能定义/mobile-test.md 全部职责 (Appium driver 真跑 / 云真机
+- 不实装 skills/mobile-test.md 全部职责 (Appium driver 真跑 / 云真机
/ 弱网注入 / 小程序开发者工具 CLI 等留后续深化)
- 输出执行计划 JSON, 真执行守护在 utils 层 (mobile_driver.py / miniprogram_runner)
"""
@@ -21,7 +21,7 @@
class MobileTest(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 mobile-test skill(03-技能定义/mobile-test.md)。\n"
+ "你是 Test-Agent 项目内 mobile-test skill(skills/mobile-test.md)。\n"
"职责:基于 PRD + 上游 mobile-tester expert 产物,编排移动端测试 6 阶段执行计划。\n"
"原则:\n"
"1) 识别目标平台:Android / iOS / 微信/支付宝/抖音 小程序 / 混合 H5\n"
diff --git a/runtime/orchestrator/skills/pentest_api.py b/runtime/orchestrator/skills/pentest_api.py
index 01cfc95..1e3d85f 100644
--- a/runtime/orchestrator/skills/pentest_api.py
+++ b/runtime/orchestrator/skills/pentest_api.py
@@ -6,10 +6,12 @@
"""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("pentest-api")
class PentestApi(AgentRunner):
def system_prompt(self) -> str:
diff --git a/runtime/orchestrator/skills/pentest_coordinator.py b/runtime/orchestrator/skills/pentest_coordinator.py
index 611fa45..a07fb3d 100644
--- a/runtime/orchestrator/skills/pentest_coordinator.py
+++ b/runtime/orchestrator/skills/pentest_coordinator.py
@@ -3,7 +3,7 @@
V1.21.0 minimum viable (ROADMAP skill rollout #1 落地, 解锁 rollout skill 流水线):
- LLM 读 PRD + tagent.yml 授权摘要 + 上游 pentest-tester expert 产物 → 5 阶段并发计划
(recon / vuln / exploit / post-exploit / report) + 子 skill 调用顺序 + 授权前置检查 evidence
-- 不实装 03-技能定义/pentest-coordinator.md 全部职责 (subagent pool 真起 / Allure 报告生成
+- 不实装 skills/pentest-coordinator.md 全部职责 (subagent pool 真起 / Allure 报告生成
/ decisions/ 真写入 等留后续深化)
- shannon 哲学 (仅 working PoC 入报告) + 主宪章 §22 决策不可逆禁止 + §24 safe-by-default
@@ -24,7 +24,7 @@
class PentestCoordinator(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 pentest-coordinator skill(03-技能定义/pentest-coordinator.md)。\n"
+ "你是 Test-Agent 项目内 pentest-coordinator skill(skills/pentest-coordinator.md)。\n"
"职责:基于 PRD + 授权上下文 + 上游 pentest-tester expert 产物,编排渗透测试 5 阶段流程。\n"
"原则:\n"
"1) 前置检查铁律 (主宪章 §24):tagent.yml pentest.authorized=true + pentest.scope=[list]\n"
diff --git a/runtime/orchestrator/skills/pentest_exploit.py b/runtime/orchestrator/skills/pentest_exploit.py
index 89cda3b..cf666ec 100644
--- a/runtime/orchestrator/skills/pentest_exploit.py
+++ b/runtime/orchestrator/skills/pentest_exploit.py
@@ -8,10 +8,12 @@
"""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("pentest-exploit")
class PentestExploit(AgentRunner):
def system_prompt(self) -> str:
diff --git a/runtime/orchestrator/skills/pentest_recon.py b/runtime/orchestrator/skills/pentest_recon.py
index c042768..92daca5 100644
--- a/runtime/orchestrator/skills/pentest_recon.py
+++ b/runtime/orchestrator/skills/pentest_recon.py
@@ -20,7 +20,7 @@
class PentestRecon(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 pentest-recon skill(03-技能定义/pentest-recon.md)。\n"
+ "你是 Test-Agent 项目内 pentest-recon skill(skills/pentest-recon.md)。\n"
"职责:基于 PRD + 授权上下文 + 上游 pentest-tester 产物,编排渗透侦察信息收集计划。\n"
"原则:\n"
"1) 授权前置铁律: tagent.yml pentest.recon_active=true 允许主动扫;否则仅 passive\n"
diff --git a/runtime/orchestrator/skills/pentest_report.py b/runtime/orchestrator/skills/pentest_report.py
index 646d6ac..1556aed 100644
--- a/runtime/orchestrator/skills/pentest_report.py
+++ b/runtime/orchestrator/skills/pentest_report.py
@@ -7,10 +7,12 @@
"""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("pentest-report")
class PentestReport(AgentRunner):
def system_prompt(self) -> str:
diff --git a/runtime/orchestrator/skills/pentest_vuln.py b/runtime/orchestrator/skills/pentest_vuln.py
index 7214759..c7f9e9d 100644
--- a/runtime/orchestrator/skills/pentest_vuln.py
+++ b/runtime/orchestrator/skills/pentest_vuln.py
@@ -21,7 +21,7 @@
class PentestVuln(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 pentest-vuln skill(03-技能定义/pentest-vuln.md)。\n"
+ "你是 Test-Agent 项目内 pentest-vuln skill(skills/pentest-vuln.md)。\n"
"职责:基于 PRD + 授权 + 上游侦察产物,编排 5 攻击域并发漏洞发现计划。\n"
"原则:\n"
"1) 授权铁律: tagent.yml pentest 段 must have authorized=true + scope list\n"
diff --git a/runtime/orchestrator/skills/pentest_web.py b/runtime/orchestrator/skills/pentest_web.py
index c4d24eb..89c90cf 100644
--- a/runtime/orchestrator/skills/pentest_web.py
+++ b/runtime/orchestrator/skills/pentest_web.py
@@ -5,10 +5,12 @@
"""
from __future__ import annotations
+
from pathlib import Path
-from typing import Any
+
from runtime.orchestrator.agents.base import AgentRunner, RunnerContext, register_skill
+
@register_skill("pentest-web")
class PentestWeb(AgentRunner):
def system_prompt(self) -> str:
diff --git a/runtime/orchestrator/skills/system_test.py b/runtime/orchestrator/skills/system_test.py
index 71ad360..fff31fd 100644
--- a/runtime/orchestrator/skills/system_test.py
+++ b/runtime/orchestrator/skills/system_test.py
@@ -4,7 +4,7 @@
- LLM 读 PRD + 上游 system-tester expert 产物 → 6 阶段执行计划
(环境检查 / IoT 测试 / 音视频校验 / 链路追踪 / 消息队列 / 报告归档)
+ 质量门禁 + 子场景路由策略
-- 不实装 03-技能定义/system-test.md 全部职责 (SSH 真跑 / 串口读写
+- 不实装 skills/system-test.md 全部职责 (SSH 真跑 / 串口读写
/ FFmpeg 解码 / Jaeger 查询 / Kafka consumer 等留后续深化)
- 输出执行计划 JSON, 真执行守护在 utils 层 (iot_helper / media_validator
/ tracing_validator / mq_helper)
@@ -22,7 +22,7 @@
class SystemTest(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 system-test skill(03-技能定义/system-test.md)。\n"
+ "你是 Test-Agent 项目内 system-test skill(skills/system-test.md)。\n"
"职责:基于 PRD + 上游 system-tester expert 产物,编排系统集成测试 6 阶段执行计划。\n"
"原则:\n"
"1) 识别子场景:iot / audiovideo / tracing / mq / multi (可复合)\n"
diff --git a/runtime/orchestrator/skills/visual_test.py b/runtime/orchestrator/skills/visual_test.py
index bc7ad5f..8c642c1 100644
--- a/runtime/orchestrator/skills/visual_test.py
+++ b/runtime/orchestrator/skills/visual_test.py
@@ -4,7 +4,7 @@
- LLM 读 PRD + 上游 visual-tester expert 产物 → 5 阶段执行计划
(环境检查 / 模板图准备 / 视觉冒烟 / 视觉回归 / 报告归档)
+ 质量门禁 + 多分辨率策略
-- 不实装 03-技能定义/visual-test.md 全部职责 (Airtest 真跑 / OCR 引擎
+- 不实装 skills/visual-test.md 全部职责 (Airtest 真跑 / OCR 引擎
/ SSIM 像素对比 / 多设备矩阵 等留后续深化)
- 输出执行计划 JSON, 真执行守护在 utils 层 (visual_helper.py)
"""
@@ -21,7 +21,7 @@
class VisualTest(AgentRunner):
def system_prompt(self) -> str:
return (
- "你是 Test-Agent 项目内 visual-test skill(03-技能定义/visual-test.md)。\n"
+ "你是 Test-Agent 项目内 visual-test skill(skills/visual-test.md)。\n"
"职责:基于 PRD + 上游 visual-tester expert 产物,编排视觉/游戏测试 5 阶段执行计划。\n"
"原则:\n"
"1) 识别目标类型:手游 / PC游戏 / 网页游戏 / Canvas/WebGL / 富图形界面 / 3D 工具\n"
diff --git a/runtime/orchestrator/tasks.py b/runtime/orchestrator/tasks.py
index fe290fc..b637cdd 100644
--- a/runtime/orchestrator/tasks.py
+++ b/runtime/orchestrator/tasks.py
@@ -6,8 +6,8 @@
from prefect import task
from prefect.tasks import exponential_backoff
-from runtime.orchestrator.adapters.experts import StepOutcome, execute_node
from runtime.observability.otel import span
+from runtime.orchestrator.adapters.experts import StepOutcome, execute_node
from runtime.router.schema import DAGNode
diff --git a/runtime/pyproject.toml b/runtime/pyproject.toml
index 15f7d07..03612b0 100644
--- a/runtime/pyproject.toml
+++ b/runtime/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "test-agent-runtime"
-version = "1.32.0"
+version = "1.42.0"
description = "Test-Agent runtime: AI router + Prefect orchestrator + FastAPI/CLI entry + flywheel storage"
requires-python = ">=3.10"
license = { text = "MIT" }
@@ -31,6 +31,7 @@ dependencies = [
"pypdf>=6.0.0",
"python-docx>=1.1.0",
"rich>=13.9.0",
+ "defusedxml>=0.7.1",
]
[project.optional-dependencies]
@@ -61,6 +62,8 @@ target-version = "py310"
select = ["E", "F", "I", "B", "UP", "SIM"]
ignore = ["E501"]
+[tool.ruff.lint.per-file-ignores]
+
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]
diff --git a/runtime/registry/INDEX.md b/runtime/registry/INDEX.md
index f2f5c33..190472f 100644
--- a/runtime/registry/INDEX.md
+++ b/runtime/registry/INDEX.md
@@ -4,7 +4,7 @@
| 文件 | 用途 |
|------|------|
-| `registry.py` | 扫 02-专家定义/*.md + 03-技能定义/*.md frontmatter,生成内存目录 |
+| `registry.py` | 扫 agents/*.md + skills/*.md frontmatter,生成内存目录 |
| `catalog.json` | 启动时生成,可手动 dump 给 LLM 用 |
## frontmatter 约定(已有)
diff --git a/runtime/registry/registry.py b/runtime/registry/registry.py
index 63f4fa1..86845bb 100644
--- a/runtime/registry/registry.py
+++ b/runtime/registry/registry.py
@@ -1,6 +1,6 @@
"""Expert + Skill registry.
-Scans `02-专家定义/*.md` and `03-技能定义/*.md`, parses YAML frontmatter,
+Scans `agents/*.md` and `skills/*.md`, parses YAML frontmatter,
exposes a unified catalog for router/orchestrator/api.
Frontmatter contract (already present in existing files):
@@ -29,7 +29,7 @@
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n(.*)$", re.DOTALL)
-# 合法 impl_status (与 02-专家定义/*.md / 03-技能定义/*.md frontmatter 严同步)
+# 合法 impl_status (与 agents/*.md / skills/*.md frontmatter 严同步)
_VALID_IMPL_STATUS = {"production", "script", "rollout", "vision"}
@@ -151,7 +151,7 @@ def build_catalog() -> Catalog:
def dump_catalog(target: Path | None = None) -> Path:
- s = get_settings()
+ get_settings()
target = target or (Path(__file__).parent / "catalog.json")
cat = build_catalog()
target.write_text(
diff --git a/runtime/router/INDEX.md b/runtime/router/INDEX.md
index 03f9491..032b92f 100644
--- a/runtime/router/INDEX.md
+++ b/runtime/router/INDEX.md
@@ -5,8 +5,8 @@
| 文件 | 用途 |
|------|------|
| `llm_client.py` | LiteLLM 多厂商封装 + Ollama 兜底 |
-| `expert_loader.py` | 扫描 `02-专家定义/*.md` frontmatter |
-| `skill_loader.py` | 扫描 `03-技能定义/*.md` frontmatter |
+| `expert_loader.py` | 扫描 `agents/*.md` frontmatter |
+| `skill_loader.py` | 扫描 `skills/*.md` frontmatter |
| `prompt.py` | 路由 system prompt(指导 LLM 选专家+Skill) |
| `schema.py` | DAG/Decision Pydantic 模型 |
| `router.py` | 主路由:被测物 → Decision(experts/skills/order/confidence/rationale) |
diff --git a/runtime/router/__init__.py b/runtime/router/__init__.py
index 5cdbc69..0d1be44 100644
--- a/runtime/router/__init__.py
+++ b/runtime/router/__init__.py
@@ -1,5 +1,5 @@
"""AI router: input artifact -> expert+skill DAG.
-Reads frontmatter of 02-专家定义/*.md and 03-技能定义/*.md via registry,
+Reads frontmatter of agents/*.md and skills/*.md via registry,
asks LLM (LiteLLM multi-provider + Ollama fallback) to produce a DAG.
"""
diff --git a/runtime/router/llm_client.py b/runtime/router/llm_client.py
index 9d5c9c5..0bb8873 100644
--- a/runtime/router/llm_client.py
+++ b/runtime/router/llm_client.py
@@ -91,7 +91,8 @@ def _call(self, provider: str, system: str, user: str, temperature: float, *, ma
def _extract_json(raw: str) -> dict[str, Any]:
raw = raw.strip()
if raw.startswith("```"):
- raw = raw.strip("`")
+ # Strip exactly one fenced code block marker
+ raw = raw[3:-3].strip() if raw.endswith("```") else raw[3:]
# strip leading lang tag e.g. ```json
if "\n" in raw:
_, raw = raw.split("\n", 1)
diff --git a/runtime/router/router.py b/runtime/router/router.py
index da72cf4..6dd51bc 100644
--- a/runtime/router/router.py
+++ b/runtime/router/router.py
@@ -27,7 +27,7 @@ def _validate_against_catalog(decision: RoutingDecision, catalog: Catalog) -> li
issues: list[str] = []
# V1.14 防 mock (ROADMAP V1.15 Day 0 承诺): 检查 expert / skill 实装状态
- # 单源: catalog entry.impl_status (02-专家定义/03-技能定义 .md frontmatter)
+ # 单源: catalog entry.impl_status (agents/skills .md frontmatter)
# rollout / vision / unknown 状态 router 仍可路由,但 issues 列表标 warning + downgrade confidence
# → orchestrator execute_node 跑到时会硬拒并报明确错误 (returncode=2),不输出 mock 数据
for n in decision.dag:
diff --git a/runtime/scheduler/carbon_scheduler.py b/runtime/scheduler/carbon_scheduler.py
index 35f0e7a..9c6753b 100644
--- a/runtime/scheduler/carbon_scheduler.py
+++ b/runtime/scheduler/carbon_scheduler.py
@@ -17,7 +17,7 @@
import json
import os
import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
from pathlib import Path
from typing import Any
diff --git a/runtime/scheduler/scheduler.py b/runtime/scheduler/scheduler.py
index 47fd6cd..1aa2de9 100644
--- a/runtime/scheduler/scheduler.py
+++ b/runtime/scheduler/scheduler.py
@@ -7,12 +7,11 @@
from __future__ import annotations
-import os
+import contextlib
import threading
-import time
+from collections.abc import Callable
from datetime import datetime, timezone
from pathlib import Path
-from typing import Callable
from loguru import logger
@@ -55,15 +54,11 @@ def _release_lock(f) -> None:
if _LOCK_BACKEND == "fcntl":
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
elif _LOCK_BACKEND == "msvcrt":
- try:
+ with contextlib.suppress(OSError):
msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
- except OSError:
- pass
finally:
- try:
+ with contextlib.suppress(OSError):
f.close()
- except OSError:
- pass
def run_job(job: dict, *, runner: Callable[[str], dict] | None = None) -> dict:
diff --git a/runtime/security/supply_chain.py b/runtime/security/supply_chain.py
index 54691c6..daaedc5 100644
--- a/runtime/security/supply_chain.py
+++ b/runtime/security/supply_chain.py
@@ -15,7 +15,6 @@
import uuid
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Any
@dataclass
@@ -84,7 +83,7 @@ def generate_sbom(output_path: str = "workspace/sbom.cdx.json") -> SbomReport:
"metadata": {"timestamp": report.timestamp,
"component": {"name": "test-dependencies", "type": "library"}},
"components": [{"type": "library", "name": p.name, "version": p.version,
- "purl": p.purl, "licenses": [{"license": {"name": l}} for l in p.licenses],
+ "purl": p.purl, "licenses": [{"license": {"name": lic}} for lic in p.licenses],
"hashes": [{"alg": k.upper(), "content": v} for k, v in p.hashes.items()]}
for p in report.packages if p.name != "unknown"],
}
diff --git a/runtime/self_healing/__init__.py b/runtime/self_healing/__init__.py
index e957b1c..2ee45cc 100644
--- a/runtime/self_healing/__init__.py
+++ b/runtime/self_healing/__init__.py
@@ -1,6 +1,6 @@
"""Self-healing: auto-retry + locator fallback + LLM output repair."""
-from runtime.self_healing.retry import with_retry
from runtime.self_healing.locator_store import LocatorStore
+from runtime.self_healing.retry import with_retry
__all__ = ["with_retry", "LocatorStore"]
diff --git a/runtime/storage/db.py b/runtime/storage/db.py
index b509a87..41bf88e 100644
--- a/runtime/storage/db.py
+++ b/runtime/storage/db.py
@@ -2,8 +2,8 @@
from __future__ import annotations
+from collections.abc import Iterator
from contextlib import contextmanager
-from typing import Iterator
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
diff --git a/runtime/storage/models.py b/runtime/storage/models.py
index e6578f5..5b0665f 100644
--- a/runtime/storage/models.py
+++ b/runtime/storage/models.py
@@ -37,8 +37,8 @@ class Run(Base):
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
artifact_keys: Mapped[list[str] | None] = mapped_column(JSON, nullable=True)
- cases: Mapped[list["Case"]] = relationship(back_populates="run", cascade="all, delete-orphan")
- evidence: Mapped[list["Evidence"]] = relationship(back_populates="run", cascade="all, delete-orphan")
+ cases: Mapped[list[Case]] = relationship(back_populates="run", cascade="all, delete-orphan")
+ evidence: Mapped[list[Evidence]] = relationship(back_populates="run", cascade="all, delete-orphan")
class CaseResult(str, enum.Enum):
@@ -63,7 +63,7 @@ class Case(Base):
error: Mapped[str | None] = mapped_column(Text, nullable=True)
run: Mapped[Run] = relationship(back_populates="cases")
- defects: Mapped[list["Defect"]] = relationship(back_populates="case", cascade="all, delete-orphan")
+ defects: Mapped[list[Defect]] = relationship(back_populates="case", cascade="all, delete-orphan")
class DefectSeverity(str, enum.Enum):
diff --git a/runtime/subagent/pool.py b/runtime/subagent/pool.py
index 7a25692..daa581a 100644
--- a/runtime/subagent/pool.py
+++ b/runtime/subagent/pool.py
@@ -3,13 +3,14 @@
from __future__ import annotations
import concurrent.futures
+import os
import threading
from loguru import logger
_executor: concurrent.futures.ThreadPoolExecutor | None = None
_lock = threading.Lock()
-_DEFAULT_WORKERS = 32
+_DEFAULT_WORKERS = min(32, (os.cpu_count() or 4))
def get_pool() -> concurrent.futures.ThreadPoolExecutor:
diff --git a/runtime/subagent/spawn.py b/runtime/subagent/spawn.py
index 8971c9a..74fe423 100644
--- a/runtime/subagent/spawn.py
+++ b/runtime/subagent/spawn.py
@@ -3,8 +3,9 @@
from __future__ import annotations
import concurrent.futures
+from collections.abc import Callable
from dataclasses import dataclass
-from typing import Any, Callable
+from typing import Any
from loguru import logger
diff --git a/runtime/tests/conftest.py b/runtime/tests/conftest.py
index c8269c3..df5355c 100644
--- a/runtime/tests/conftest.py
+++ b/runtime/tests/conftest.py
@@ -3,10 +3,24 @@
from __future__ import annotations
import os
+import sys
from pathlib import Path
import pytest
+# Inject utils/ and all subdirectories into sys.path
+# V1.42.0: utils/ reorganized from flat into 12 functional subdirectories
+_PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(_PROJECT_ROOT) not in sys.path:
+ sys.path.insert(0, str(_PROJECT_ROOT))
+
+_UTILS_DIR = _PROJECT_ROOT / "utils"
+if _UTILS_DIR.is_dir() and str(_UTILS_DIR) not in sys.path:
+ sys.path.insert(0, str(_UTILS_DIR))
+ for _sub in _UTILS_DIR.iterdir():
+ if _sub.is_dir() and not _sub.name.startswith(("_", ".")) and str(_sub) not in sys.path:
+ sys.path.insert(0, str(_sub))
+
@pytest.fixture(autouse=True)
def _env_isolation(tmp_path: Path, monkeypatch):
diff --git a/runtime/tests/test_cli_commands.py b/runtime/tests/test_cli_commands.py
index 9fb14ea..e8d234a 100644
--- a/runtime/tests/test_cli_commands.py
+++ b/runtime/tests/test_cli_commands.py
@@ -10,8 +10,8 @@
EXPECTED_COMMANDS = [
"catalog", "demo", "doctor", "export", "init",
- "search", "list", "install", "uninstall", "verify",
- "run", "plan", "selftest",
+ "install", "uninstall", "verify",
+ "run", "selftest",
]
@@ -26,6 +26,7 @@ def test_all_commands_registered():
def test_version_flag():
"""--version prints version and exits 0."""
import re
+
from runtime import __version__
result = runner.invoke(app, ["--version"])
assert result.exit_code == 0
@@ -51,8 +52,7 @@ def test_doctor_command():
def test_help_per_command():
"""Each command has its own --help."""
- for cmd in ["run", "catalog", "doctor", "selftest", "demo", "init", "export",
- "search", "list", "install", "uninstall", "verify", "plan"]:
+ for cmd in EXPECTED_COMMANDS:
result = runner.invoke(app, [cmd, "--help"])
assert result.exit_code == 0, f"{cmd} --help failed"
assert result.stdout.strip(), f"{cmd} --help produced no output"
diff --git a/runtime/tests/test_cli_config.py b/runtime/tests/test_cli_config.py
index a429eaf..a9887bd 100644
--- a/runtime/tests/test_cli_config.py
+++ b/runtime/tests/test_cli_config.py
@@ -73,7 +73,7 @@ def test_list_shows_six_builtins_and_compat_examples():
assert name in result.stdout
assert "zhipu" in result.stdout
assert "doubao" in result.stdout
- assert "04-配置文件/llm-providers.md" in result.stdout
+ assert "config/llm-providers.md" in result.stdout
def test_show_missing_env_hints_creation(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
diff --git a/runtime/tests/test_impl_status_filter.py b/runtime/tests/test_impl_status_filter.py
index ccb740a..4f91f84 100644
--- a/runtime/tests/test_impl_status_filter.py
+++ b/runtime/tests/test_impl_status_filter.py
@@ -4,7 +4,7 @@
router 路由仍可生成 DAG 但 _validate_against_catalog 标 issue + 降 confidence,
orchestrator execute_node 跑到时 returncode=2 + stderr "未实装",绝不输出 mock 数据。
-单源:02-专家定义/03-技能定义 *.md frontmatter EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS。
+单源:agents/skills *.md frontmatter EXPERT_IMPL_STATUS / SKILL_IMPL_STATUS。
"""
from __future__ import annotations
@@ -36,7 +36,7 @@ def test_registry_expert_status_counts():
def test_registry_skill_status_counts():
- """Skill 32 = 23 production + 7 script + 0 rollout + 2 vision (V1.32.5 全 skill rollout 完成)。"""
+ """Skill 32 = 23 production + 7 script + 0 rollout + 2 vision (V1.36.0 全 skill rollout 完成)。"""
cat = get_catalog()
counts = Counter(e.impl_status for e in cat.skills.values())
assert counts.get("production", 0) == 23, f"skill production 应 23,实 {counts.get('production')}"
@@ -72,7 +72,7 @@ def test_router_flags_rollout_expert():
def test_router_does_not_falsely_flag_production_skill():
- """V1.32.5 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。"""
+ """V1.36.0 全 rollout 完成 — production skill 不应被 flag 为 rollout/vision。"""
cat = get_catalog()
dec = _mk_decision(("n1", "skill", "visual-test"))
issues = router._validate_against_catalog(dec, cat)
@@ -122,7 +122,7 @@ def test_execute_node_rejects_rollout_expert():
def test_execute_node_allows_production_skill():
- """V1.32.5 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。"""
+ """V1.36.0 全 rollout 完成 — production skill 应正常执行 (rc=0),不被硬拒。"""
r = execute_node("automotive-can-bus-test", "skill")
assert r.returncode == 0, f"production skill 被误拒: rc={r.returncode} stderr={r.stderr}"
assert r.stdout, "production skill 应产出结果"
diff --git a/runtime/tests/test_portability.py b/runtime/tests/test_portability.py
index c8dc978..89faf76 100644
--- a/runtime/tests/test_portability.py
+++ b/runtime/tests/test_portability.py
@@ -2,9 +2,9 @@
from __future__ import annotations
-import sys
-import subprocess
import os
+import subprocess
+import sys
from pathlib import Path
import pytest
@@ -52,8 +52,9 @@ def test_no_aggressive_env_overwrite(self):
class TestReplaceability:
def test_standard_interfaces(self):
"""Core functions use standard Python interfaces (no custom protocols)."""
- from runtime.orchestrator.adapters.experts import execute_node
import inspect
+
+ from runtime.orchestrator.adapters.experts import execute_node
sig = inspect.signature(execute_node)
params = list(sig.parameters.keys())
assert "name" in params
diff --git a/runtime/tests/test_registry.py b/runtime/tests/test_registry.py
index a5d2b45..cf0af6b 100644
--- a/runtime/tests/test_registry.py
+++ b/runtime/tests/test_registry.py
@@ -6,11 +6,10 @@
from runtime.registry.registry import build_catalog
-
# 动态扫源目录而非写死数字 — 项目持续增长 agent/skill,基线会过时
_PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[2]
-_EXPERTS_DIR = _PROJECT_ROOT / "02-专家定义"
-_SKILLS_DIR = _PROJECT_ROOT / "03-技能定义"
+_EXPERTS_DIR = _PROJECT_ROOT / "agents"
+_SKILLS_DIR = _PROJECT_ROOT / "skills"
def test_catalog_loads_existing_assets():
@@ -23,11 +22,11 @@ def test_catalog_loads_existing_assets():
assert len(cat.experts) >= src_experts, (
f"experts loaded={len(cat.experts)}, source agents={src_experts} "
- f"— registry 漏扫,检查 02-专家定义/ 下的 [0-9]*.md 文件"
+ f"— registry 漏扫,检查 agents/ 下的 [0-9]*.md 文件"
)
assert len(cat.skills) >= src_skills, (
f"skills loaded={len(cat.skills)}, source skills>={src_skills} "
- f"— registry 漏扫,检查 03-技能定义/ 下的 *.md 文件"
+ f"— registry 漏扫,检查 skills/ 下的 *.md 文件"
)
assert "test-lead" in cat.experts, "test-lead expert missing"
diff --git a/runtime/tests/test_router.py b/runtime/tests/test_router.py
index 84c99db..d1e2daa 100644
--- a/runtime/tests/test_router.py
+++ b/runtime/tests/test_router.py
@@ -49,7 +49,7 @@ def test_router_starts_with_requirements_analyst():
def test_router_ends_with_test_lead_decision():
- """DAG 末节点 = test-lead 决策(主宪章 §40 + 02-专家定义/README.md 流程
+ """DAG 末节点 = test-lead 决策(主宪章 §40 + agents/README.md 流程
"bug-manager → report-generator → test-lead 决策")。report-generator 倒数第二。"""
art = TargetArtifact(kind="text", text="generic web system")
decision = route(art, client=LLMClient(provider="stub", fallback="stub"))
diff --git a/runtime/tests/test_router_real.py b/runtime/tests/test_router_real.py
index a3aaa5c..bd1020b 100644
--- a/runtime/tests/test_router_real.py
+++ b/runtime/tests/test_router_real.py
@@ -20,7 +20,6 @@
import json
import os
import random
-import sys
import time
from pathlib import Path
diff --git a/runtime/tests/test_utils_absentee.py b/runtime/tests/test_utils_absentee.py
new file mode 100644
index 0000000..b2e08c7
--- /dev/null
+++ b/runtime/tests/test_utils_absentee.py
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for absentee_scenario_injector.py — Phase 3.3 缺席者场景注入."""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+
+# ═══════════════════════════════════════════════════════════════
+# Group listing tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestListGroups:
+ def test_all_9_groups_present(self):
+ from absentee_scenario_injector import list_groups
+ groups = list_groups()
+ assert len(groups) == 9
+
+ def test_each_group_has_label(self):
+ from absentee_scenario_injector import list_groups
+ for g in list_groups():
+ assert g["id"]
+ assert g["label"]
+ assert g["scenario_count"] > 0
+
+
+# ═══════════════════════════════════════════════════════════════
+# Scenario query tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestQueryScenarios:
+ def test_query_all_returns_all(self):
+ from absentee_scenario_injector import SCENARIOS, query_scenarios
+ assert len(query_scenarios()) == len(SCENARIOS)
+
+ def test_query_by_group(self):
+ from absentee_scenario_injector import query_scenarios
+ results = query_scenarios(groups=["visual_impairment"])
+ assert len(results) >= 3
+ assert all(s.group == "visual_impairment" for s in results)
+
+ def test_query_by_severity(self):
+ from absentee_scenario_injector import query_scenarios
+ results = query_scenarios(severity="P0")
+ assert len(results) > 0
+ assert all(s.severity == "P0" for s in results)
+
+ def test_query_by_tags(self):
+ from absentee_scenario_injector import query_scenarios
+ results = query_scenarios(tags=["screen-reader"])
+ assert len(results) >= 1
+ assert any("screen-reader" in s.tags for s in results)
+
+ def test_query_combined(self):
+ from absentee_scenario_injector import query_scenarios
+ results = query_scenarios(groups=["visual_impairment"], severity="P0")
+ assert all(s.group == "visual_impairment" and s.severity == "P0" for s in results)
+
+ def test_query_empty_group(self):
+ from absentee_scenario_injector import query_scenarios
+ results = query_scenarios(groups=["nonexistent_group"])
+ assert len(results) == 0
+
+
+# ═══════════════════════════════════════════════════════════════
+# Scenario injection tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestInjectScenarios:
+ def test_inject_all(self):
+ from absentee_scenario_injector import SCENARIOS, inject_scenarios
+ results = inject_scenarios()
+ # Default min_severity=P2 includes all
+ assert len(results) == len(SCENARIOS)
+
+ def test_inject_p0_only(self):
+ from absentee_scenario_injector import inject_scenarios
+ results = inject_scenarios(min_severity="P0")
+ assert all(s["severity"] == "P0" for s in results)
+
+ def test_inject_with_count_limit(self):
+ from absentee_scenario_injector import inject_scenarios
+ results = inject_scenarios(count=5)
+ assert len(results) == 5
+
+ def test_inject_specific_group(self):
+ from absentee_scenario_injector import inject_scenarios
+ results = inject_scenarios(groups=["mental_crisis"])
+ assert len(results) >= 3
+ assert all(s["group"] == "mental_crisis" for s in results)
+
+ def test_injected_has_required_fields(self):
+ from absentee_scenario_injector import inject_scenarios
+ results = inject_scenarios(count=1)
+ s = results[0]
+ for field in ["id", "group", "severity", "title", "description", "test_steps", "expected"]:
+ assert field in s, f"Missing field: {field}"
+ assert isinstance(s["test_steps"], list)
+ assert len(s["test_steps"]) > 0
+
+
+# ═══════════════════════════════════════════════════════════════
+# Charter generation tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestGenerateCharter:
+ def test_generates_markdown(self):
+ from absentee_scenario_injector import generate_charter, query_scenarios
+ scenarios = query_scenarios(groups=["visual_impairment"], severity="P0")
+ charter = generate_charter(scenarios[0], module="login", duration_min=45)
+ assert "# Charter:" in charter
+ assert "login" in charter
+ assert "视觉障碍" in charter
+ assert "## 测试步骤" in charter
+ assert "## 预期结果" in charter
+
+ def test_batch_generates_files(self, tmp_path):
+ from absentee_scenario_injector import generate_batch_charters
+ paths = generate_batch_charters(
+ groups=["mental_crisis"], severity="P0",
+ output_dir=str(tmp_path),
+ )
+ assert len(paths) >= 3
+ for p in paths:
+ assert Path(p).exists()
+ content = Path(p).read_text(encoding="utf-8")
+ assert "mental_crisis" in content.lower() or "MC-" in content
+
+
+# ═══════════════════════════════════════════════════════════════
+# Coverage report tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestCoverageReport:
+ def test_full_coverage(self):
+ from absentee_scenario_injector import coverage_report, inject_scenarios
+ scenarios = inject_scenarios()
+ report = coverage_report(scenarios)
+ assert report["total_absentee_groups"] == 9
+ assert report["coverage_pct"] == 100.0
+ assert len(report["groups_missing"]) == 0
+
+ def test_partial_coverage(self):
+ from absentee_scenario_injector import coverage_report, inject_scenarios
+ scenarios = inject_scenarios(groups=["visual_impairment", "elderly"])
+ report = coverage_report(scenarios)
+ assert report["groups_covered"] == 2
+ assert report["coverage_pct"] < 100.0
+ assert len(report["groups_missing"]) == 7
+
+ def test_empty_coverage(self):
+ from absentee_scenario_injector import coverage_report
+ report = coverage_report([])
+ assert report["groups_covered"] == 0
+ assert report["coverage_pct"] == 0.0
+
+
+# ═══════════════════════════════════════════════════════════════
+# Export tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestExport:
+ def test_export_json(self, tmp_path):
+ from absentee_scenario_injector import export_injection_plan, inject_scenarios
+ scenarios = inject_scenarios(groups=["elderly"])
+ path = export_injection_plan(scenarios, output_dir=str(tmp_path))
+ assert Path(path).exists()
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
+ assert data["total_scenarios"] > 0
+ assert "coverage" in data
+
+ def test_ci_summary(self):
+ from absentee_scenario_injector import ci_summary, inject_scenarios
+ scenarios = inject_scenarios(groups=["visual_impairment", "mental_crisis"])
+ text = ci_summary(scenarios)
+ assert "visual_impairment" in text or "视觉" in text
+ assert "mental_crisis" in text or "精神" in text
diff --git a/runtime/tests/test_utils_bug_tracker.py b/runtime/tests/test_utils_bug_tracker.py
new file mode 100644
index 0000000..6ea1261
--- /dev/null
+++ b/runtime/tests/test_utils_bug_tracker.py
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for bug_tracker_base.py ABC and factory."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+
+class TestBugTrackerBase:
+ def test_cannot_instantiate_abstract(self):
+ from bug_tracker_base import BugTrackerBase
+ with pytest.raises(TypeError):
+ BugTrackerBase() # type: ignore[abstract]
+
+ def test_concrete_subclass_instantiable(self):
+ from bug_tracker_base import BugTrackerBase
+
+ class FakeTracker(BugTrackerBase):
+ def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""):
+ return "BUG-1"
+
+ def get_status(self, bug_id):
+ return {"status": "open", "assignee": "", "severity": 3, "last_updated": ""}
+
+ def add_comment(self, bug_id, comment, attachments=None):
+ pass
+
+ def link_testcase(self, bug_id, testcase_id):
+ pass
+
+ def query_open_bugs(self, filters=None):
+ return []
+
+ tracker = FakeTracker()
+ assert tracker.submit_bug("test", "desc", 1) == "BUG-1"
+ assert tracker.get_status("BUG-1")["status"] == "open"
+
+ def test_missing_method_fails(self):
+ from bug_tracker_base import BugTrackerBase
+
+ class IncompleteTracker(BugTrackerBase):
+ def submit_bug(self, title, description, severity, attachments=None, reproduce_steps=""):
+ return ""
+
+ with pytest.raises(TypeError):
+ IncompleteTracker() # type: ignore[abstract]
+
+
+class TestTrackerRegistry:
+ def test_zentao_registered(self):
+ from bug_tracker_base import TRACKER_REGISTRY
+ assert "zentao" in TRACKER_REGISTRY
+
+ def test_jira_registered(self):
+ from bug_tracker_base import TRACKER_REGISTRY
+ assert "jira" in TRACKER_REGISTRY
+
+ def test_github_registered(self):
+ from bug_tracker_base import TRACKER_REGISTRY
+ assert "github" in TRACKER_REGISTRY
+
+ def test_linear_registered(self):
+ from bug_tracker_base import TRACKER_REGISTRY
+ assert "linear" in TRACKER_REGISTRY
+
+ def test_webhook_registered(self):
+ from bug_tracker_base import TRACKER_REGISTRY
+ assert "webhook" in TRACKER_REGISTRY
+
+ def test_all_registry_values_are_basetracker_subclasses(self):
+ from bug_tracker_base import TRACKER_REGISTRY, BugTrackerBase
+ for name, cls in TRACKER_REGISTRY.items():
+ if name == "zentao":
+ # Legacy: ZentaoBugManager not yet migrated to BugTrackerBase ABC
+ continue
+ assert issubclass(cls, BugTrackerBase), f"{name}: {cls} not a BugTrackerBase subclass"
+
+
+class TestCreateBugManager:
+ def test_returns_none_for_unknown_tracker(self, monkeypatch):
+ monkeypatch.delenv("BUG_TRACKER", raising=False)
+ from bug_tracker_base import create_bug_manager
+ assert create_bug_manager("nonexistent-tracker") is None
+
+ def test_returns_instance_for_webhook(self, monkeypatch):
+ monkeypatch.setenv("WEBHOOK_BUG_URL", "https://example.com/webhook")
+ from bug_tracker_base import create_bug_manager
+ mgr = create_bug_manager("webhook")
+ assert mgr is not None
+ assert type(mgr).__name__ == "WebhookBugManager"
diff --git a/runtime/tests/test_utils_evidence_chain.py b/runtime/tests/test_utils_evidence_chain.py
new file mode 100644
index 0000000..5c40cb8
--- /dev/null
+++ b/runtime/tests/test_utils_evidence_chain.py
@@ -0,0 +1,408 @@
+# SPDX-License-Identifier: MIT
+"""Tests for evidence_chain.py - evidentiary chain admissibility."""
+import json
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "utils"))
+from evidence_chain import ( # noqa: E402
+ ChainOfCustody,
+ EvidenceItem,
+ EvidencePackage,
+ build_evidence_chain,
+ ci_summary,
+ collect_baselines,
+ collect_decisions,
+ collect_dora_metrics,
+ collect_test_history,
+ collect_tracing_validation,
+ compliance_matrix,
+ export_chain_of_custody_report,
+ export_package,
+ hash_content,
+ quick_package,
+ verify_chain_integrity,
+)
+
+# ── Fixtures ──
+
+@pytest.fixture
+def tmp_decisions_dir():
+ with tempfile.TemporaryDirectory() as d:
+ p = Path(d)
+ (p / "d1.json").write_text(json.dumps({
+ "ts": "20260519T120000Z",
+ "verdict": "go",
+ "rationale": "All checks passed.",
+ "metrics": {"pass_rate": 1.0},
+ }))
+ (p / "d2.json").write_text(json.dumps({
+ "ts": "20260519T130000Z",
+ "verdict": "conditional",
+ "rationale": "上游 degraded",
+ "known_risks": ["risk A"],
+ }))
+ (p / "bad.json").write_text("not json")
+ yield p
+
+
+@pytest.fixture
+def sample_item():
+ return EvidenceItem(
+ id="ev-1",
+ source="decisions",
+ category="decision_log",
+ timestamp="2026-05-19T12:00:00Z",
+ content={"key": "value", "count": 42},
+ )
+
+
+@pytest.fixture
+def sample_chain(sample_item):
+ c = ChainOfCustody(chain_id="test-chain", created_at="2026-05-19T12:00:00Z")
+ c.add(sample_item)
+ return c
+
+
+@pytest.fixture
+def sample_package(sample_chain):
+ pkg = EvidencePackage(
+ package_id="EP-20260519-0001",
+ chain=sample_chain,
+ metadata={"generator": "test"},
+ )
+ pkg.seal()
+ return pkg
+
+
+@pytest.fixture
+def sample_deployments():
+ return [
+ {"timestamp": "2026-05-19T10:00:00Z", "env": "prod", "success": True},
+ {"timestamp": "2026-05-19T11:00:00Z", "env": "prod", "success": True},
+ {"timestamp": "2026-05-19T12:00:00Z", "env": "prod", "success": False},
+ ]
+
+
+@pytest.fixture
+def sample_incidents():
+ return [
+ {"started": "2026-05-19T09:00:00Z", "resolved": "2026-05-19T09:30:00Z", "severity": "P1"},
+ {"started": "2026-05-19T14:00:00Z", "resolved": "2026-05-19T14:45:00Z", "severity": "P0"},
+ ]
+
+
+# ── Test hash_content ──
+
+class TestHashContent:
+ def test_deterministic(self):
+ a = hash_content({"b": 2, "a": 1})
+ b = hash_content({"a": 1, "b": 2})
+ assert a == b
+
+ def test_different_content(self):
+ a = hash_content({"x": 1})
+ b = hash_content({"x": 2})
+ assert a != b
+
+ def test_hex_format(self):
+ h = hash_content({"test": True})
+ assert len(h) == 64
+ assert all(c in "0123456789abcdef" for c in h)
+
+
+# ── Test EvidenceItem ──
+
+class TestEvidenceItem:
+ def test_auto_hash(self, sample_item):
+ assert len(sample_item.content_hash) == 64
+ assert sample_item.previous_hash is None
+
+ def test_explicit_hash(self):
+ item = EvidenceItem(
+ id="e1", source="test", category="cat",
+ timestamp="2026-01-01T00:00:00Z",
+ content={"x": 1}, content_hash="abc123",
+ )
+ assert item.content_hash == "abc123"
+
+ def test_different_id_different_hash(self, sample_item):
+ item2 = EvidenceItem(
+ id="ev-2", source="decisions", category="decision_log",
+ timestamp="2026-05-19T12:00:00Z", content={"key": "value", "count": 42},
+ )
+ assert sample_item.content_hash == item2.content_hash # same content
+
+
+# ── Test ChainOfCustody ──
+
+class TestChainOfCustody:
+ def test_empty_chain_root_hash(self):
+ c = ChainOfCustody(chain_id="empty")
+ assert len(c.root_hash()) == 64
+
+ def test_add_links_previous_hash(self, sample_item):
+ c = ChainOfCustody(chain_id="test")
+ item2 = EvidenceItem(
+ id="ev-2", source="dora", category="metrics",
+ timestamp="2026-05-19T13:00:00Z", content={"mttr": 1.5},
+ )
+ c.add(sample_item)
+ c.add(item2)
+ assert item2.previous_hash == sample_item.content_hash
+ assert len(c.items) == 2
+
+ def test_root_hash_changes_after_add(self, sample_chain, sample_item):
+ h1 = sample_chain.root_hash()
+ item2 = EvidenceItem(
+ id="ev-2", source="test", category="test",
+ timestamp="now", content={"new": True},
+ )
+ sample_chain.add(item2)
+ assert sample_chain.root_hash() != h1
+
+
+# ── Test EvidencePackage ──
+
+class TestEvidencePackage:
+ def test_seal_sets_proof(self, sample_package):
+ assert len(sample_package.integrity_proof) == 64
+ assert sample_package.exported_at != ""
+
+ def test_reproducible_seal(self, sample_chain):
+ pkg1 = EvidencePackage(package_id="P1", chain=sample_chain)
+ pkg2 = EvidencePackage(package_id="P1", chain=sample_chain)
+ pkg1.seal()
+ pkg2.seal()
+ assert pkg1.integrity_proof == pkg2.integrity_proof
+
+
+# ── Test collectors ──
+
+class TestCollectDecisions:
+ def test_collects_all_valid(self, tmp_decisions_dir):
+ items = collect_decisions(tmp_decisions_dir)
+ assert len(items) == 2
+
+ def test_empty_dir(self):
+ with tempfile.TemporaryDirectory() as d:
+ assert collect_decisions(Path(d)) == []
+
+ def test_missing_dir(self):
+ assert collect_decisions(Path("/nonexistent/path")) == []
+
+ def test_content_fields(self, tmp_decisions_dir):
+ items = collect_decisions(tmp_decisions_dir)
+ assert items[0]["verdict"] == "go"
+ assert items[1]["verdict"] == "conditional"
+
+
+class TestCollectDoraMetrics:
+ def test_returns_summary(self, sample_deployments, sample_incidents):
+ result = collect_dora_metrics(sample_deployments, sample_incidents)
+ assert "deployment_frequency" in result
+ assert "mttr" in result
+
+ def test_empty_deployments(self):
+ result = collect_dora_metrics([], [])
+ assert result["deployment_frequency"]["deployments"] == 0
+
+
+class TestCollectTracingValidation:
+ def test_all_pass(self):
+ results = [
+ {"pass": True, "services_found": ["svc-a", "svc-b"]},
+ {"pass": True, "services_found": ["svc-a"]},
+ ]
+ r = collect_tracing_validation(results)
+ assert r["pass_rate"] == 1.0
+
+ def test_mixed(self):
+ results = [{"pass": True, "services_found": ["x"]}, {"pass": False, "services_found": []}]
+ r = collect_tracing_validation(results)
+ assert r["pass_rate"] == 0.5
+
+ def test_empty(self):
+ r = collect_tracing_validation([])
+ assert r["traces_checked"] == 0
+
+
+class TestCollectBaselines:
+ def test_missing_file(self):
+ r = collect_baselines(Path("/nonexistent/baseline.json"))
+ assert r["available"] is False
+
+ def test_existing_file(self):
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+ json.dump({"tps": 100, "p95": 200}, f)
+ path = Path(f.name)
+ try:
+ r = collect_baselines(path)
+ assert r["available"] is True
+ assert r["tps"] == 100
+ finally:
+ path.unlink()
+
+
+class TestCollectTestHistory:
+ def test_empty_dir(self):
+ with tempfile.TemporaryDirectory() as d:
+ assert collect_test_history(Path(d)) == []
+
+ def test_missing_dir(self):
+ assert collect_test_history(Path("/nonexistent")) == []
+
+ def test_collects_xml(self):
+ with tempfile.TemporaryDirectory() as d:
+ p = Path(d)
+ (p / "result1.xml").write_text("")
+ (p / "result2.xml").write_text("")
+ items = collect_test_history(p)
+ assert len(items) == 2
+
+
+# ── Test build_evidence_chain ──
+
+class TestBuildEvidenceChain:
+ def test_builds_from_decisions(self, tmp_decisions_dir):
+ pkg = build_evidence_chain(decisions_dir=tmp_decisions_dir)
+ assert len(pkg.chain.items) >= 1
+ assert pkg.integrity_proof != ""
+
+ def test_builds_from_all_sources(self, tmp_decisions_dir, sample_deployments, sample_incidents):
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+ json.dump({"tps": 50}, f)
+ bp = Path(f.name)
+ try:
+ pkg = build_evidence_chain(
+ decisions_dir=tmp_decisions_dir,
+ dora_deployments=sample_deployments,
+ dora_incidents=sample_incidents,
+ trace_results=[{"pass": True, "services_found": ["api"]}],
+ baseline_path=bp,
+ )
+ sources = {item.source for item in pkg.chain.items}
+ assert "decisions" in sources
+ assert "dora_metrics" in sources
+ assert "tracing_validator" in sources
+ finally:
+ bp.unlink()
+
+ def test_builds_with_nothing(self):
+ pkg = build_evidence_chain(
+ decisions_dir=Path("/nonexistent"),
+ history_dir=Path("/nonexistent"),
+ )
+ assert len(pkg.chain.items) == 0
+ assert len(pkg.integrity_proof) == 64
+
+
+# ── Test verify_chain_integrity ──
+
+class TestVerifyChainIntegrity:
+ def test_valid_package_passes(self, sample_package):
+ result = verify_chain_integrity(sample_package)
+ assert result["pass"] is True
+ assert result["tampered"] == []
+
+ def test_tampered_content_fails(self, sample_package):
+ sample_package.chain.items[0].content["key"] = "tampered"
+ result = verify_chain_integrity(sample_package)
+ assert result["pass"] is False
+
+ def test_broken_chain_link_fails(self, sample_package, sample_item):
+ item2 = EvidenceItem(
+ id="ev-2", source="test", category="test",
+ timestamp="now", content={"x": 1},
+ )
+ item2.previous_hash = "0000000000000000000000000000000000000000000000000000000000000000"
+ sample_package.chain.items.append(item2)
+ sample_package.seal()
+ result = verify_chain_integrity(sample_package)
+ assert result["pass"] is False
+
+ def test_wrong_integrity_proof_fails(self, sample_package):
+ sample_package.integrity_proof = "bad"
+ result = verify_chain_integrity(sample_package)
+ assert result["pass"] is False
+
+
+# ── Test exports ──
+
+class TestExportPackage:
+ def test_exports_valid_json(self, sample_package):
+ with tempfile.TemporaryDirectory() as d:
+ out = Path(d) / "test_evidence.json"
+ path = export_package(sample_package, out)
+ data = json.loads(Path(path).read_text())
+ assert data["package_id"] == sample_package.package_id
+ assert data["chain"]["item_count"] == 1
+
+ def test_auto_path(self, sample_package, monkeypatch):
+ old_cwd = Path.cwd()
+ with tempfile.TemporaryDirectory() as d:
+ monkeypatch.chdir(d)
+ try:
+ path_str = export_package(sample_package)
+ path = Path(path_str)
+ assert path.exists()
+ data = json.loads(path.read_text())
+ assert data["package_id"] == sample_package.package_id
+ finally:
+ monkeypatch.chdir(str(old_cwd))
+
+
+class TestExportChainOfCustodyReport:
+ def test_creates_markdown(self, sample_package):
+ with tempfile.TemporaryDirectory() as d:
+ out = Path(d) / "custody.md"
+ path = export_chain_of_custody_report(sample_package, out)
+ content = Path(path).read_text()
+ assert "# Chain of Custody Report" in content
+ assert sample_package.package_id in content
+
+
+# ── Test compliance ──
+
+class TestComplianceMatrix:
+ def test_returns_all_standards(self):
+ m = compliance_matrix()
+ assert "ISO_27001" in m
+ assert "SOC2" in m
+ assert "NIST_800_53" in m
+ assert "GDPR" in m
+
+
+# ── Test ci_summary ──
+
+class TestCiSummary:
+ def test_returns_key_fields(self, sample_package):
+ s = ci_summary(sample_package)
+ assert s["items"] == 1
+ assert "decisions" in s["sources"]
+ assert s["integrity_verified"] is True
+ assert len(s["root_hash"]) == 16
+
+ def test_with_multiple_items(self, sample_package, sample_item):
+ item2 = EvidenceItem(
+ id="ev-2", source="dora_metrics", category="metrics",
+ timestamp="now", content={"mttr": 2.0},
+ )
+ sample_package.chain.add(item2)
+ sample_package.seal()
+ s = ci_summary(sample_package)
+ assert s["items"] == 2
+ assert s["dora_available"] is True
+
+
+# ── Test quick_package ──
+
+class TestQuickPackage:
+ def test_returns_package(self):
+ pkg = quick_package()
+ assert isinstance(pkg, EvidencePackage)
+ assert pkg.package_id.startswith("EP-")
+ assert len(pkg.integrity_proof) == 64
diff --git a/runtime/tests/test_utils_fairness.py b/runtime/tests/test_utils_fairness.py
new file mode 100644
index 0000000..33726dc
--- /dev/null
+++ b/runtime/tests/test_utils_fairness.py
@@ -0,0 +1,286 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for fairness_auditor.py — Phase 3.1 伦理/偏见审计."""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+
+# ═══════════════════════════════════════════════════════════════
+# Fixtures
+# ═══════════════════════════════════════════════════════════════
+
+@pytest.fixture
+def balanced_dataset():
+ """Two groups, exactly equal representation, exactly equal label rates."""
+ # Group 0: 50 positive, 50 negative
+ y0 = np.array([1] * 50 + [0] * 50, dtype=float)
+ # Group 1: 50 positive, 50 negative (same distribution)
+ y1 = np.array([1] * 50 + [0] * 50, dtype=float)
+ y_true = np.concatenate([y0, y1])
+ sensitive = np.array([0] * 100 + [1] * 100)
+ return y_true, sensitive
+
+
+@pytest.fixture
+def biased_dataset():
+ """Group 0 overrepresented, group 0 has higher positive rate."""
+ rng = np.random.RandomState(42)
+ n_a, n_b = 160, 40 # 80/20 split
+ y_a = rng.choice([0, 1], n_a, p=[0.3, 0.7]) # 70% positive
+ y_b = rng.choice([0, 1], n_b, p=[0.7, 0.3]) # 30% positive
+ y_true = np.concatenate([y_a, y_b]).astype(float)
+ sensitive = np.array([0] * n_a + [1] * n_b)
+ return y_true, sensitive
+
+
+@pytest.fixture
+def fair_predictions():
+ """Predictions that are perfectly fair across groups — exact same positive rate."""
+ # Group 0: 50 positive, 50 negative
+ y0 = np.array([1] * 50 + [0] * 50, dtype=float)
+ # Group 1: 50 positive, 50 negative (same distribution)
+ y1 = np.array([1] * 50 + [0] * 50, dtype=float)
+ y_true = np.concatenate([y0, y1])
+ y_pred = y_true.copy() # perfect predictions
+ sensitive = np.array([0] * 100 + [1] * 100)
+ return y_true, y_pred, sensitive
+
+
+@pytest.fixture
+def biased_predictions():
+ """Predictions biased against group 1."""
+ rng = np.random.RandomState(42)
+ n_a, n_b = 100, 100
+ # Group 0: perfect prediction
+ yt_a = rng.randint(0, 2, n_a).astype(float)
+ yp_a = yt_a.copy()
+ # Group 1: 30% false negative rate
+ yt_b = rng.randint(0, 2, n_b).astype(float)
+ yp_b = yt_b.copy()
+ fn_mask = (yt_b == 1) & (rng.random(n_b) < 0.3)
+ yp_b[fn_mask] = 0
+ y_true = np.concatenate([yt_a, yt_b]).astype(float)
+ y_pred = np.concatenate([yp_a, yp_b]).astype(float)
+ sensitive = np.array([0] * n_a + [1] * n_b)
+ return y_true, y_pred, sensitive
+
+
+# ═══════════════════════════════════════════════════════════════
+# Dataset bias tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditDatasetBias:
+ def test_balanced_dataset_passes(self, balanced_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = balanced_dataset
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"])
+ assert report.overall_severity == "pass"
+ assert report.source == "dataset"
+
+ def test_biased_dataset_detects_representation_gap(self, biased_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = biased_dataset
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"],
+ representation_threshold=0.15)
+ assert report.overall_severity in ("warning", "fail")
+ repr_result = next(r for r in report.fairness_results
+ if r.metric == "representation_parity")
+ assert not repr_result.passed
+
+ def test_biased_dataset_detects_label_imbalance(self, biased_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = biased_dataset
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"])
+ label_result = next(r for r in report.fairness_results
+ if r.metric == "label_balance")
+ assert not label_result.passed
+
+ def test_recommendations_generated_for_biased(self, biased_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = biased_dataset
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"])
+ assert len(report.recommendations) > 0
+
+ def test_mismatched_group_names_raises(self, balanced_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = balanced_dataset
+ with pytest.raises(ValueError):
+ audit_dataset_bias(y_true, sensitive, group_names=["only_one"])
+
+ def test_repr_custom_threshold(self, biased_dataset):
+ from fairness_auditor import audit_dataset_bias
+ y_true, sensitive = biased_dataset
+ # Very permissive threshold → should pass
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"],
+ representation_threshold=0.5)
+ repr_result = next(r for r in report.fairness_results
+ if r.metric == "representation_parity")
+ assert repr_result.passed
+
+
+# ═══════════════════════════════════════════════════════════════
+# Model fairness tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditModelFairness:
+ def test_perfect_predictions_pass_all_metrics(self, fair_predictions):
+ from fairness_auditor import audit_model_fairness
+ y_true, y_pred, sensitive = fair_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ assert report.overall_severity == "pass"
+ assert all(r.passed for r in report.fairness_results)
+
+ def test_biased_predictions_detected(self, biased_predictions):
+ from fairness_auditor import audit_model_fairness
+ y_true, y_pred, sensitive = biased_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ # At least equal_opportunity should fail (TPR gap)
+ assert report.overall_severity in ("warning", "fail")
+
+ def test_disparate_impact_computed(self, fair_predictions):
+ from fairness_auditor import audit_model_fairness
+ y_true, y_pred, sensitive = fair_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ di = next(r for r in report.fairness_results if r.metric == "disparate_impact")
+ assert di.value > 0.0
+ assert di.value <= 1.0
+
+ def test_group_metrics_populated(self, fair_predictions):
+ from fairness_auditor import audit_model_fairness
+ y_true, y_pred, sensitive = fair_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["X", "Y"])
+ assert len(report.groups) == 2
+ for g in report.groups:
+ assert g.count > 0
+ assert g.tpr is not None
+ assert g.fpr is not None
+
+ def test_all_6_metrics_present(self, biased_predictions):
+ from fairness_auditor import audit_model_fairness
+ y_true, y_pred, sensitive = biased_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ metric_names = {r.metric for r in report.fairness_results}
+ expected = {"disparate_impact", "statistical_parity_difference",
+ "equal_opportunity", "equalized_odds",
+ "calibration_parity", "predictive_parity"}
+ assert expected.issubset(metric_names)
+
+
+# ═══════════════════════════════════════════════════════════════
+# Intersectional fairness tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditIntersectional:
+ @pytest.fixture
+ def intersectional_data(self):
+ rng = np.random.RandomState(42)
+ n = 200
+ y_true = rng.randint(0, 2, n).astype(float)
+ # Gender: half 0, half 1
+ gender = np.array([0] * 100 + [1] * 100)
+ # Race: 0 for first 60 + last 50, 1 for middle 90
+ race = np.array([0] * 60 + [1] * 40 + [0] * 50 + [1] * 50)
+ y_pred = y_true.copy()
+ # Bias: gender=1 & race=1 get worse predictions
+ mask = (gender == 1) & (race == 1)
+ y_pred[mask] = rng.choice([0, 1], mask.sum(), p=[0.4, 0.6])
+ return y_true, y_pred, {"gender": gender, "race": race}
+
+ def test_intersectional_groups_created(self, intersectional_data):
+ from fairness_auditor import audit_intersectional
+ y_true, y_pred, sensitive = intersectional_data
+ report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5)
+ assert len(report.groups) >= 2
+
+ def test_intersectional_metrics_present(self, intersectional_data):
+ from fairness_auditor import audit_intersectional
+ y_true, y_pred, sensitive = intersectional_data
+ report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=5)
+ metric_names = {r.metric for r in report.fairness_results}
+ assert "intersectional_disparate_impact" in metric_names
+ assert "intersectional_accuracy_gap" in metric_names
+
+ def test_small_groups_filtered(self, intersectional_data):
+ from fairness_auditor import audit_intersectional
+ y_true, y_pred, sensitive = intersectional_data
+ # With high min_group_size, all groups should be filtered
+ report = audit_intersectional(y_true, y_pred, sensitive, min_group_size=1000)
+ assert report.overall_severity == "pass"
+ assert len(report.groups) == 0
+
+
+# ═══════════════════════════════════════════════════════════════
+# Decision fairness tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditDecisionFairness:
+ def test_fair_decisions_pass(self):
+ from fairness_auditor import audit_decision_fairness
+ rng = np.random.RandomState(42)
+ decisions = rng.choice([0, 1], 200, p=[0.5, 0.5]).astype(float)
+ sensitive = np.array([0] * 100 + [1] * 100)
+ report = audit_decision_fairness(decisions, sensitive, group_names=["A", "B"])
+ # With random decisions and equal groups, should be close to fair
+ assert report.overall_severity in ("pass", "warning")
+
+ def test_biased_decisions_detected(self):
+ from fairness_auditor import audit_decision_fairness
+ rng = np.random.RandomState(42)
+ # Group 0: 80% approved, Group 1: 20% approved
+ d0 = rng.choice([0, 1], 100, p=[0.2, 0.8]).astype(float)
+ d1 = rng.choice([0, 1], 100, p=[0.8, 0.2]).astype(float)
+ decisions = np.concatenate([d0, d1])
+ sensitive = np.array([0] * 100 + [1] * 100)
+ report = audit_decision_fairness(decisions, sensitive, group_names=["A", "B"])
+ assert report.overall_severity == "fail"
+
+ def test_decision_groups_match(self):
+ from fairness_auditor import audit_decision_fairness
+ decisions = np.array([1, 1, 0, 0, 1, 0])
+ sensitive = np.array([0, 0, 0, 1, 1, 1])
+ report = audit_decision_fairness(decisions, sensitive, group_names=["X", "Y"])
+ assert len(report.groups) == 2
+ assert report.groups[0].count == 3
+ assert report.groups[1].count == 3
+
+
+# ═══════════════════════════════════════════════════════════════
+# Export and summary tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestExport:
+ def test_export_creates_file(self, balanced_dataset, tmp_path):
+ from fairness_auditor import audit_dataset_bias, export_bias_report
+ y_true, sensitive = balanced_dataset
+ report = audit_dataset_bias(y_true, sensitive, group_names=["A", "B"])
+ path = export_bias_report(report, output_dir=str(tmp_path))
+ assert Path(path).exists()
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
+ assert data["overall_severity"] == "pass"
+ assert data["source"] == "dataset"
+ assert len(data["fairness_results"]) == 2
+
+ def test_summary_contains_metrics(self, fair_predictions):
+ from fairness_auditor import audit_model_fairness, summary
+ y_true, y_pred, sensitive = fair_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ text = summary(report)
+ assert "disparate_impact" in text
+ assert "equal_opportunity" in text
+
+ def test_summary_shows_severity(self, fair_predictions):
+ from fairness_auditor import audit_model_fairness, summary
+ y_true, y_pred, sensitive = fair_predictions
+ report = audit_model_fairness(y_true, y_pred, sensitive, group_names=["A", "B"])
+ text = summary(report)
+ assert "PASS" in text
diff --git a/runtime/tests/test_utils_i18n_taboo.py b/runtime/tests/test_utils_i18n_taboo.py
new file mode 100644
index 0000000..35fe864
--- /dev/null
+++ b/runtime/tests/test_utils_i18n_taboo.py
@@ -0,0 +1,359 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for i18n_checker.py Phase 5 — 神圣性与跨文化禁忌审计."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+from i18n_checker import ( # noqa: E402
+ audit_sacred_contexts,
+ audit_taboo_colors,
+ audit_taboo_holidays,
+ audit_taboo_numbers,
+ audit_taboo_words,
+ run_taboo_audit,
+)
+
+# ═══════════════════════════════════════════════════════════════
+# audit_taboo_words
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditTabooWords:
+ """Scan text for taboo words per locale."""
+
+ def test_detects_chinese_political_taboo(self):
+ result = audit_taboo_words("台独主张", ["zh-CN"])
+ assert result["hits"] >= 1
+ finding = result["findings"][0]
+ assert finding["locale"] == "zh-CN"
+ assert finding["severity"] == "critical"
+
+ def test_detects_japanese_discrimination_term(self):
+ result = audit_taboo_words("気違いな行動", ["ja-JP"])
+ assert result["hits"] >= 1
+ assert any("気違い" in f["matched_word"] for f in result["findings"])
+
+ def test_detects_english_racial_slur(self):
+ result = audit_taboo_words("the nigger word", ["en-US"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_detects_german_nazi_taboo(self):
+ result = audit_taboo_words("Heil Hitler", ["de-DE"])
+ assert result["hits"] >= 1
+ assert result["findings"][0]["locale"] == "de-DE"
+
+ def test_detects_hindi_beef_taboo(self):
+ result = audit_taboo_words("beef meat", ["hi-IN"])
+ assert result["hits"] >= 1
+
+ def test_detects_thai_lese_majeste(self):
+ result = audit_taboo_words("หมิ่นพระบรมเดชานุภาพ", ["th-TH"])
+ assert result["hits"] >= 1
+ assert result["findings"][0]["severity"] == "critical"
+
+ def test_clean_text_returns_no_hits(self):
+ result = audit_taboo_words("hello world 你好", ["en-US", "zh-CN"])
+ assert result["hits"] == 0
+
+ def test_scan_all_locales_when_none_specified(self):
+ result = audit_taboo_words("beef sandwich")
+ assert result["hits"] >= 1 # hi-IN: beef
+
+ def test_empty_text_returns_zero_hits(self):
+ result = audit_taboo_words("", ["zh-CN"])
+ assert result["hits"] == 0
+
+ def test_case_insensitive_matching(self):
+ result = audit_taboo_words("HEIL HITLER", ["de-DE"])
+ assert result["hits"] >= 1
+
+ def test_multiple_locales_scan(self):
+ result = audit_taboo_words("台独 beef", ["zh-CN", "hi-IN"])
+ assert result["hits"] >= 2
+
+
+# ═══════════════════════════════════════════════════════════════
+# audit_taboo_colors
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditTabooColors:
+ """Check color usage against cultural taboo matrix."""
+
+ def test_white_is_taboo_in_east_asia(self):
+ result = audit_taboo_colors(["white"], ["zh-CN"])
+ assert result["hits"] >= 1
+ assert any(f["color"] == "white" for f in result["findings"])
+
+ def test_white_is_taboo_in_japan(self):
+ result = audit_taboo_colors(["white"], ["ja-JP"])
+ assert result["hits"] >= 1
+
+ def test_white_is_taboo_in_india(self):
+ result = audit_taboo_colors(["white"], ["hi-IN"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_red_name_writing_taboo_in_korea(self):
+ result = audit_taboo_colors(["red"], ["ko-KR"])
+ assert result["hits"] >= 1
+ assert any("名字" in f["context"] or "name" in f["reason"].lower() for f in result["findings"])
+
+ def test_green_is_sacred_in_arabic(self):
+ result = audit_taboo_colors(["green"], ["ar-SA"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_safe_colors_return_no_hits(self):
+ result = audit_taboo_colors(["blue", "teal", "orange"], ["en-US"])
+ assert result["hits"] == 0
+
+ def test_mixed_locales_multiple_hits(self):
+ result = audit_taboo_colors(["white"], ["zh-CN", "hi-IN"])
+ assert result["hits"] >= 2
+
+ def test_case_insensitive_color_matching(self):
+ result = audit_taboo_colors(["WHITE", "Red"], ["zh-CN"])
+ assert result["hits"] >= 2 # white + red both taboo in zh-CN
+
+ def test_all_locales_scan(self):
+ result = audit_taboo_colors(["purple"])
+ assert result["hits"] >= 2 # th-TH + pt-BR + it-IT
+
+
+# ═══════════════════════════════════════════════════════════════
+# audit_taboo_numbers
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditTabooNumbers:
+ """Check numbers against cultural taboo matrix."""
+
+ def test_4_is_taboo_in_chinese(self):
+ result = audit_taboo_numbers([4], ["zh-CN"])
+ assert result["hits"] >= 1
+ assert result["findings"][0]["matched_taboo"] == 4
+
+ def test_4_is_taboo_in_japanese(self):
+ result = audit_taboo_numbers([4], ["ja-JP"])
+ assert result["hits"] >= 1
+
+ def test_4_is_taboo_in_korean(self):
+ result = audit_taboo_numbers([4], ["ko-KR"])
+ assert result["hits"] >= 1
+
+ def test_13_is_taboo_western(self):
+ result = audit_taboo_numbers([13], ["en-US"])
+ assert result["hits"] >= 1
+ assert result["findings"][0]["matched_taboo"] == 13
+
+ def test_666_is_taboo_christian(self):
+ result = audit_taboo_numbers([666], ["en-US"])
+ assert result["hits"] >= 1
+ assert result["findings"][0]["severity"] == "high"
+
+ def test_8_in_funeral_context_is_taboo(self):
+ result = audit_taboo_numbers([8], ["zh-CN"])
+ assert result["hits"] >= 1
+
+ def test_safe_numbers_return_no_hits(self):
+ result = audit_taboo_numbers([1, 2, 3, 5], ["en-US"])
+ assert result["hits"] == 0
+
+ def test_containment_matching_14_contains_4(self):
+ result = audit_taboo_numbers([14], ["zh-CN"])
+ # 14 contains 4 and also 14 is a separate taboo in zh-CN
+ assert result["hits"] >= 1
+
+ def test_containment_matching_1401_contains_4_and_14(self):
+ result = audit_taboo_numbers([1401], ["zh-CN"])
+ assert result["hits"] >= 1 # 4 is in 1401, 14 is also
+
+ def test_multiple_numbers_multiple_locales(self):
+ result = audit_taboo_numbers([4, 13, 17], ["zh-CN", "en-US", "it-IT"])
+ assert result["hits"] >= 3
+
+ def test_zero_is_taboo_in_red_envelope(self):
+ result = audit_taboo_numbers([0], ["zh-CN"])
+ assert result["hits"] >= 1
+
+
+# ═══════════════════════════════════════════════════════════════
+# audit_taboo_holidays
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditTabooHolidays:
+ """Check date against taboo holiday periods."""
+
+ def test_qingming_date(self):
+ """清明节 4月4-5日 should match."""
+ result = audit_taboo_holidays("04-05", ["zh-CN"])
+ assert result["hits"] >= 1
+ assert any("清明" in f["matched_period"] for f in result["findings"])
+
+ def test_september_18_china(self):
+ """九一八 9月18日 should match."""
+ result = audit_taboo_holidays("09-18", ["zh-CN"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_nanjing_massacre_day(self):
+ """南京公祭日 12月13日 should match."""
+ result = audit_taboo_holidays("12-13", ["zh-CN"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_hiroshima_day(self):
+ """广岛原爆 8月6日 should match."""
+ result = audit_taboo_holidays("08-06", ["ja-JP"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_nagasaki_day(self):
+ """长崎原爆 8月9日 should match."""
+ result = audit_taboo_holidays("08-09", ["ja-JP"])
+ assert result["hits"] >= 1
+
+ def test_sept_11_us(self):
+ """9/11 should match."""
+ result = audit_taboo_holidays("09-11", ["en-US"])
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_victory_day_russia(self):
+ """5月9日 胜利日 should match."""
+ result = audit_taboo_holidays("05-09", ["ru-RU"])
+ assert result["hits"] >= 1
+
+ def test_ordinary_day_returns_no_hits(self):
+ result = audit_taboo_holidays("03-15", ["zh-CN", "en-US"])
+ assert result["hits"] == 0
+
+ def test_given_date_range_qingming(self):
+ """清明节 range 4月4-5日 — 4月4日 should also match."""
+ result = audit_taboo_holidays("04-04", ["zh-CN"])
+ assert result["hits"] >= 1
+
+
+# ═══════════════════════════════════════════════════════════════
+# audit_sacred_contexts
+# ═══════════════════════════════════════════════════════════════
+
+class TestAuditSacredContexts:
+ """Check context descriptions against sacredness rules."""
+
+ def test_funeral_context_matches_global_rule(self):
+ result = audit_sacred_contexts("葬礼", "zh-CN")
+ assert result["hits"] >= 1
+
+ def test_children_context_matches_global_rule(self):
+ result = audit_sacred_contexts("儿童用户", "*")
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_tiananmen_context(self):
+ result = audit_sacred_contexts("天安门广场", "zh-CN")
+ assert result["hits"] >= 1
+
+ def test_mecca_context(self):
+ result = audit_sacred_contexts("麦加", "ar-SA")
+ assert result["hits"] >= 1
+ assert any(f["severity"] == "critical" for f in result["findings"])
+
+ def test_western_wall_context(self):
+ result = audit_sacred_contexts("哭墙", "he-IL")
+ assert result["hits"] >= 1
+
+ def test_irrelevant_context_returns_no_hits(self):
+ result = audit_sacred_contexts("咖啡店", "zh-CN")
+ assert result["hits"] == 0
+
+ def test_global_wildcard_locale_includes_global_rules(self):
+ result = audit_sacred_contexts("宗教场所", "*")
+ assert result["hits"] >= 1
+
+ def test_bidirectional_matching(self):
+ result = audit_sacred_contexts("殡仪馆", "zh-CN")
+ assert result["hits"] >= 1
+
+
+# ═══════════════════════════════════════════════════════════════
+# run_taboo_audit (combined entry point)
+# ═══════════════════════════════════════════════════════════════
+
+class TestRunTabooAudit:
+ """Combined taboo audit with full payload."""
+
+ def test_full_payload_returns_all_dimensions(self):
+ payload = {
+ "text": "Hello world test",
+ "colors": ["white"],
+ "numbers": [4, 13],
+ "context": "宗教场所",
+ "locales": ["zh-CN", "en-US", "ar-SA"],
+ }
+ result = run_taboo_audit(payload)
+ assert "taboo_words" in result
+ assert "taboo_colors" in result
+ assert "taboo_numbers" in result
+ assert "taboo_holidays" in result
+ assert "sacred_contexts" in result
+ assert "matrix_summary" in result
+ assert "supported_locales" in result
+ assert result["phase"] == 5
+ assert result["audit_name"] == "sacredness_cross_cultural_taboo"
+ assert result["total_hits"] > 0
+
+ def test_minimal_payload(self):
+ result = run_taboo_audit({})
+ assert result["total_hits"] == 0 # no data to scan
+ assert "taboo_holidays" in result # still runs with today's date
+
+ def test_text_only_payload(self):
+ result = run_taboo_audit({"text": "台独 nigger"})
+ assert result["taboo_words"]["hits"] >= 2
+
+ def test_total_hits_aggregates_correctly(self):
+ payload = {
+ "text": "beef台独",
+ "colors": ["white"],
+ "numbers": [4],
+ "locales": ["zh-CN", "hi-IN"],
+ }
+ result = run_taboo_audit(payload)
+ expected = (
+ result["taboo_words"]["hits"]
+ + result["taboo_colors"]["hits"]
+ + result["taboo_numbers"]["hits"]
+ + result["taboo_holidays"]["hits"]
+ )
+ assert result["total_hits"] == expected
+
+ def test_locale_filter_applied_to_all_dimensions(self):
+ payload = {
+ "text": "台独 beef nigger",
+ "colors": ["white", "green"],
+ "numbers": [4, 13],
+ "locales": ["zh-CN"],
+ }
+ result = run_taboo_audit(payload)
+ # Only zh-CN violations should register
+ for finding in result["taboo_words"]["findings"]:
+ assert finding["locale"] == "zh-CN"
+ for finding in result["taboo_colors"]["findings"]:
+ assert finding["locale"] == "zh-CN"
+ for finding in result["taboo_numbers"]["findings"]:
+ assert finding["locale"] == "zh-CN"
+
+ def test_sacred_context_uses_first_locale(self):
+ payload = {
+ "context": "殡仪馆",
+ "locales": ["zh-CN", "en-US"],
+ }
+ result = run_taboo_audit(payload)
+ assert result["sacred_contexts"]["hits"] >= 1
+ assert result["sacred_contexts"]["locale_filter"] == "zh-CN"
diff --git a/runtime/tests/test_utils_quality_gate.py b/runtime/tests/test_utils_quality_gate.py
new file mode 100644
index 0000000..d7f82fb
--- /dev/null
+++ b/runtime/tests/test_utils_quality_gate.py
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for ci_quality_gate.py and quality_gate_engine.py."""
+
+from __future__ import annotations
+
+import json
+import sys
+import tempfile
+import xml.etree.ElementTree as ET
+from pathlib import Path
+
+# Ensure utils is importable
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+
+# ── ci_quality_gate tests ──────────────────────────────────────────────
+
+class TestParseJunit:
+ def make_junit_xml(self, tests: int, failures: int, errors: int, skipped: int) -> str:
+ root = ET.Element("testsuite", {
+ "tests": str(tests),
+ "failures": str(failures),
+ "errors": str(errors),
+ "skipped": str(skipped),
+ })
+ return ET.tostring(root, encoding="unicode")
+
+ def test_all_pass(self):
+ from ci_quality_gate import parse_junit
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ f.write(self.make_junit_xml(100, 0, 0, 0))
+ path = f.name
+ try:
+ result = parse_junit(path)
+ assert result is not None
+ assert result["total"] == 100
+ assert result["passed"] == 100
+ assert result["pass_rate_pct"] == 100.0
+ finally:
+ Path(path).unlink()
+
+ def test_mixed_failures(self):
+ from ci_quality_gate import parse_junit
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ f.write(self.make_junit_xml(50, 5, 2, 3))
+ path = f.name
+ try:
+ result = parse_junit(path)
+ assert result is not None
+ assert result["total"] == 50
+ assert result["failed"] == 7
+ assert result["skipped"] == 3
+ assert result["passed"] == 40
+ assert result["pass_rate_pct"] == 80.0
+ finally:
+ Path(path).unlink()
+
+ def test_missing_file(self):
+ from ci_quality_gate import parse_junit
+ assert parse_junit("/nonexistent/path.xml") is None
+
+ def test_empty_file(self):
+ from ci_quality_gate import parse_junit
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ f.write("not xml")
+ path = f.name
+ try:
+ result = parse_junit(path)
+ assert result is None
+ finally:
+ Path(path).unlink()
+
+
+class TestCheckSmoke:
+ def test_pass(self):
+ import ci_quality_gate as m
+ from ci_quality_gate import check_smoke
+ m.GATES["smoke"]["min_pass_rate_pct"] = 95
+
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ root = ET.Element("testsuite", {"tests": "100", "failures": "3", "errors": "1", "skipped": "1"})
+ f.write(ET.tostring(root, encoding="unicode"))
+ path = f.name
+ try:
+ ok, msg = check_smoke(path)
+ assert ok
+ assert "95" in msg
+ finally:
+ Path(path).unlink()
+
+ def test_fail_below_threshold(self):
+ import ci_quality_gate as m
+ from ci_quality_gate import check_smoke
+ m.GATES["smoke"]["min_pass_rate_pct"] = 95
+
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ root = ET.Element("testsuite", {"tests": "100", "failures": "10", "errors": "5", "skipped": "0"})
+ f.write(ET.tostring(root, encoding="unicode"))
+ path = f.name
+ try:
+ ok, msg = check_smoke(path)
+ assert not ok
+ finally:
+ Path(path).unlink()
+
+
+class TestCheckCoverage:
+ def make_coverage_xml(self, line_rate: float) -> str:
+ root = ET.Element("coverage", {"line-rate": str(line_rate)})
+ return ET.tostring(root, encoding="unicode")
+
+ def test_pass_above_threshold(self):
+ from ci_quality_gate import check_coverage
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ f.write(self.make_coverage_xml(0.85))
+ path = f.name
+ try:
+ ok, msg = check_coverage(path, threshold=80.0)
+ assert ok
+ finally:
+ Path(path).unlink()
+
+ def test_fail_below_threshold(self):
+ from ci_quality_gate import check_coverage
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ f.write(self.make_coverage_xml(0.55))
+ path = f.name
+ try:
+ ok, msg = check_coverage(path, threshold=80.0)
+ assert not ok
+ finally:
+ Path(path).unlink()
+
+
+# ── quality_gate_engine tests ─────────────────────────────────────────
+
+class TestQualityGateEngine:
+ def test_builtin_defaults_load(self):
+ from quality_gate_engine import _builtin_defaults
+ cfg = _builtin_defaults()
+ assert "smoke" in cfg
+ assert cfg["smoke"]["min_pass_rate_pct"] == 95
+ assert cfg["regression"]["min_coverage_pct"] == 80
+ assert cfg["performance_full"]["min_tps"] == 100
+
+ def test_engine_init_default(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ assert "smoke" in engine.config
+
+ def test_engine_smoke_pass(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ engine.config["smoke"]["min_pass_rate_pct"] = 90
+
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ root = ET.Element("testsuite", {"tests": "100", "failures": "5", "errors": "0", "skipped": "0"})
+ f.write(ET.tostring(root, encoding="unicode"))
+ path = f.name
+ try:
+ ok, msg = engine.check_smoke(path)
+ assert ok
+ finally:
+ Path(path).unlink()
+
+ def test_engine_smoke_fail(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ engine.config["smoke"]["min_pass_rate_pct"] = 95
+
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ root = ET.Element("testsuite", {"tests": "100", "failures": "40", "errors": "0", "skipped": "0"})
+ f.write(ET.tostring(root, encoding="unicode"))
+ path = f.name
+ try:
+ ok, msg = engine.check_smoke(path)
+ assert not ok
+ finally:
+ Path(path).unlink()
+
+ def test_engine_coverage(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+
+ with tempfile.NamedTemporaryFile(suffix=".xml", mode="w", delete=False) as f:
+ root = ET.Element("coverage", {"line-rate": "0.92"})
+ f.write(ET.tostring(root, encoding="unicode"))
+ path = f.name
+ try:
+ ok, msg = engine.check_coverage(path)
+ assert ok
+ finally:
+ Path(path).unlink()
+
+ def test_engine_release_missing_gates(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ ok, msg = engine.check_release()
+ assert not ok
+ assert "smoke" in msg.lower()
+
+ def test_engine_release_all_pass(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ engine.config["release"]["require_smoke"] = False
+ engine.config["release"]["require_regression"] = False
+ engine.config["release"]["require_perf_full"] = False
+ ok, msg = engine.check_release()
+ assert ok
+
+ def test_engine_summary_json(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ engine._record("smoke", True, "ok")
+ data = engine.summary_json()
+ assert data["overall_pass"] is True
+
+ def test_engine_performance_parse(self):
+ from quality_gate_engine import QualityGateEngine
+ engine = QualityGateEngine(config_path="/nonexistent/config.yaml")
+ engine.config["performance_ci_quick"] = {
+ "min_tps": 20, "max_p95_ms": 800, "max_avg_ms": 400, "max_error_pct": 1.0
+ }
+
+ with tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) as f:
+ json.dump({"tps": 30, "p95_ms": 400, "avg_ms": 200, "error_pct": 0.5}, f)
+ path = f.name
+ try:
+ ok, msg = engine.check_performance(path, mode="ci_quick")
+ assert ok
+ finally:
+ Path(path).unlink()
diff --git a/runtime/tests/test_utils_silent_failure.py b/runtime/tests/test_utils_silent_failure.py
new file mode 100644
index 0000000..000a5ed
--- /dev/null
+++ b/runtime/tests/test_utils_silent_failure.py
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for silent_failure_detector.py — Phase 3.2 沉默故障检测."""
+
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+
+# ═══════════════════════════════════════════════════════════════
+# Fixtures
+# ═══════════════════════════════════════════════════════════════
+
+@pytest.fixture
+def stable_data():
+ """Stable metric: values centered around 100, no trend."""
+ rng = np.random.RandomState(42)
+ return list(rng.normal(100, 5, 30))
+
+
+@pytest.fixture
+def trending_up_data():
+ """Metric trending upward toward threshold 200."""
+ rng = np.random.RandomState(42)
+ base = np.linspace(100, 180, 30)
+ return list(base + rng.normal(0, 5, 30))
+
+
+@pytest.fixture
+def breached_data():
+ """Metric that has crossed threshold 200 (mean ≈ 205, last points well above)."""
+ rng = np.random.RandomState(42)
+ base = np.linspace(140, 270, 30)
+ return list(base + rng.normal(0, 5, 30))
+
+
+@pytest.fixture
+def declining_data():
+ """Pass rate declining toward threshold (bad when below)."""
+ rng = np.random.RandomState(42)
+ base = np.linspace(0.95, 0.81, 30)
+ return list(base + rng.normal(0, 0.02, 30))
+
+
+# ═══════════════════════════════════════════════════════════════
+# Drift detection tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestDetectThresholdDrift:
+ def test_stable_data_silent(self, stable_data):
+ from silent_failure_detector import detect_threshold_drift
+ r = detect_threshold_drift("test_metric", stable_data, threshold=200)
+ assert r.severity == "silent"
+ assert r.trend_pvalue is not None
+
+ def test_trending_up_impending(self, trending_up_data):
+ from silent_failure_detector import detect_threshold_drift
+ r = detect_threshold_drift(
+ "latency_ms", trending_up_data, threshold=200,
+ drift_pct_threshold=0.10,
+ )
+ # Should be at least "impending" (close to threshold) or "silent" with trend
+ assert r.severity in ("silent", "impending")
+ assert r.mean_shift_pct > 0
+
+ def test_breached_detected(self, breached_data):
+ from silent_failure_detector import detect_threshold_drift
+ r = detect_threshold_drift("error_rate", breached_data, threshold=200)
+ assert r.severity == "breached"
+
+ def test_direction_below(self, declining_data):
+ from silent_failure_detector import detect_threshold_drift
+ r = detect_threshold_drift(
+ "pass_rate", declining_data, threshold=0.80,
+ direction="below",
+ )
+ # Should detect the decline
+ assert r.severity in ("silent", "impending", "breached")
+ assert r.current_mean < r.baseline_mean or r.trend_slope < 0
+
+ def test_insufficient_data(self):
+ from silent_failure_detector import detect_threshold_drift
+ r = detect_threshold_drift("sparse", [1.0, 2.0], threshold=10)
+ assert "Insufficient" in r.recommendation
+
+ def test_baseline_points_used(self, trending_up_data):
+ from silent_failure_detector import detect_threshold_drift
+ rng = np.random.RandomState(42)
+ baseline = list(rng.normal(100, 3, 50)) # stable baseline
+ r = detect_threshold_drift(
+ "metric", trending_up_data, threshold=200,
+ baseline_points=baseline,
+ )
+ assert r.baseline_mean < 105 # baseline should be near 100
+
+ def test_mann_kendall_detects_trend(self, trending_up_data):
+ from silent_failure_detector import _mann_kendall
+ arr = np.asarray(trending_up_data)
+ p = _mann_kendall(arr)
+ assert p < 0.05 # strong upward trend
+
+ def test_mann_kendall_no_trend(self, stable_data):
+ from silent_failure_detector import _mann_kendall
+ arr = np.asarray(stable_data)
+ p = _mann_kendall(arr)
+ assert p > 0.01 # no significant trend (M-K noisy with n=30)
+
+ def test_linear_trend_slope(self, trending_up_data):
+ from silent_failure_detector import _linear_trend
+ arr = np.asarray(trending_up_data)
+ slope = _linear_trend(arr)
+ assert slope > 0 # upward slope
+
+
+# ═══════════════════════════════════════════════════════════════
+# Batch detection tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestBatchDetect:
+ def test_batch_all_stable(self, stable_data):
+ from silent_failure_detector import MetricConfig, batch_detect
+ cfgs = [
+ MetricConfig("m1", "custom", stable_data, 200),
+ MetricConfig("m2", "custom", stable_data, 200),
+ ]
+ report = batch_detect(cfgs)
+ assert report.overall_severity == "pass"
+ assert report.silent_count == 2
+
+ def test_batch_one_breached(self, stable_data, breached_data):
+ from silent_failure_detector import MetricConfig, batch_detect
+ cfgs = [
+ MetricConfig("stable", "custom", stable_data, 200),
+ MetricConfig("breached", "custom", breached_data, 200),
+ ]
+ report = batch_detect(cfgs)
+ assert report.overall_severity == "fail"
+ assert report.breached_count >= 1
+
+ def test_batch_one_impending(self, stable_data, trending_up_data):
+ from silent_failure_detector import MetricConfig, batch_detect
+ cfgs = [
+ MetricConfig("stable", "custom", stable_data, 200),
+ MetricConfig("trending", "custom", trending_up_data, 200),
+ ]
+ report = batch_detect(cfgs)
+ assert report.overall_severity in ("warning", "pass")
+
+
+# ═══════════════════════════════════════════════════════════════
+# Source-specific collector tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestSourceCollectors:
+ def test_collect_from_tracing(self, trending_up_data):
+ from silent_failure_detector import collect_from_tracing
+ r = collect_from_tracing(trending_up_data, threshold_ms=200)
+ assert r.source == "tracing"
+ assert r.metric_name == "trace_duration_p95_ms"
+
+ def test_collect_from_web_vitals(self, trending_up_data):
+ from silent_failure_detector import collect_from_web_vitals
+ r = collect_from_web_vitals("LCP_ms", trending_up_data, threshold=4000)
+ assert r.source == "web_vitals"
+ assert "LCP_ms" in r.metric_name
+
+ def test_collect_from_prometheus_counter(self, trending_up_data):
+ from silent_failure_detector import collect_from_prometheus_counter
+ r = collect_from_prometheus_counter("agent_errors", trending_up_data, threshold=10)
+ assert r.source == "prometheus"
+ assert "agent_errors" in r.metric_name
+
+ def test_collect_from_prometheus_gauge_below(self, declining_data):
+ from silent_failure_detector import collect_from_prometheus_gauge
+ r = collect_from_prometheus_gauge(
+ "pass_rate", declining_data, threshold=0.80, direction="below",
+ )
+ assert r.source == "prometheus"
+
+
+# ═══════════════════════════════════════════════════════════════
+# Sliding window tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestSlidingWindow:
+ def test_push_and_get(self):
+ from silent_failure_detector import SlidingWindowStore
+ store = SlidingWindowStore(max_points=5)
+ for v in [1, 2, 3, 4, 5, 6, 7]:
+ store.push("latency", v)
+ vals = store.get("latency")
+ assert len(vals) == 5
+ assert vals == [3, 4, 5, 6, 7]
+
+ def test_get_all(self):
+ from silent_failure_detector import SlidingWindowStore
+ store = SlidingWindowStore()
+ store.push("a", 1)
+ store.push("a", 2)
+ store.push("b", 10)
+ all_data = store.get_all()
+ assert len(all_data) == 2
+
+ def test_clear(self):
+ from silent_failure_detector import SlidingWindowStore
+ store = SlidingWindowStore()
+ store.push("x", 1)
+ store.clear("x")
+ assert store.get("x") == []
+
+
+# ═══════════════════════════════════════════════════════════════
+# Export tests
+# ═══════════════════════════════════════════════════════════════
+
+class TestExport:
+ def test_export_json(self, stable_data, tmp_path):
+ from silent_failure_detector import MetricConfig, batch_detect, export_report
+ report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)])
+ path = export_report(report, output_dir=str(tmp_path))
+ assert Path(path).exists()
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
+ assert data["overall_severity"] == "pass"
+
+ def test_ci_summary(self, stable_data):
+ from silent_failure_detector import MetricConfig, batch_detect, ci_summary
+ report = batch_detect([MetricConfig("m1", "custom", stable_data, 200)])
+ text = ci_summary(report)
+ assert "PASS" in text
+ assert "silent" in text.lower()
diff --git a/runtime/tests/test_utils_taboo_matrix.py b/runtime/tests/test_utils_taboo_matrix.py
new file mode 100644
index 0000000..0071c65
--- /dev/null
+++ b/runtime/tests/test_utils_taboo_matrix.py
@@ -0,0 +1,233 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for taboo_matrix.py — Phase 5 禁忌矩阵数据完整性."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+_utils_dir = Path(__file__).resolve().parents[2] / "utils"
+if str(_utils_dir) not in sys.path:
+ sys.path.insert(0, str(_utils_dir))
+
+from taboo_matrix import ( # noqa: E402
+ SACRED_CONTEXTS,
+ TABOO_COLORS,
+ TABOO_HOLIDAYS,
+ TABOO_NUMBERS,
+ TABOO_WORDS,
+ get_matrix_summary,
+ get_sacred_contexts,
+ get_supported_locales,
+ get_taboo_colors,
+ get_taboo_holidays,
+ get_taboo_numbers,
+ get_taboo_words,
+)
+
+# ═══════════════════════════════════════════════════════════════
+# Data integrity
+# ═══════════════════════════════════════════════════════════════
+
+class TestDataIntegrity:
+ """All five data tables must be non-empty and well-formed."""
+
+ def test_taboo_words_not_empty(self):
+ assert len(TABOO_WORDS) >= 40, "Should have 40+ taboo words"
+
+ def test_taboo_colors_not_empty(self):
+ assert len(TABOO_COLORS) >= 15, "Should have 15+ taboo colors"
+
+ def test_taboo_numbers_not_empty(self):
+ assert len(TABOO_NUMBERS) >= 15, "Should have 15+ taboo numbers"
+
+ def test_taboo_holidays_not_empty(self):
+ assert len(TABOO_HOLIDAYS) >= 20, "Should have 20+ taboo holiday periods"
+
+ def test_sacred_contexts_not_empty(self):
+ assert len(SACRED_CONTEXTS) >= 10, "Should have 10+ sacred context rules"
+
+ def test_every_entry_has_required_fields(self):
+ for source, name in [
+ (TABOO_WORDS, "TABOO_WORDS"),
+ (TABOO_COLORS, "TABOO_COLORS"),
+ (TABOO_NUMBERS, "TABOO_NUMBERS"),
+ (TABOO_HOLIDAYS, "TABOO_HOLIDAYS"),
+ ]:
+ for i, entry in enumerate(source):
+ assert "locale" in entry, f"{name}[{i}] missing locale"
+ assert "severity" in entry, f"{name}[{i}] missing severity"
+ assert "reason" in entry, f"{name}[{i}] missing reason"
+
+ def test_no_empty_reason_strings(self):
+ for source, name in [
+ (TABOO_WORDS, "TABOO_WORDS"),
+ (TABOO_COLORS, "TABOO_COLORS"),
+ (TABOO_NUMBERS, "TABOO_NUMBERS"),
+ (TABOO_HOLIDAYS, "TABOO_HOLIDAYS"),
+ (SACRED_CONTEXTS, "SACRED_CONTEXTS"),
+ ]:
+ for i, entry in enumerate(source):
+ assert len(entry.get("reason", "")) >= 5, f"{name}[{i}] reason too short"
+
+ def test_all_severities_valid(self):
+ valid = {"critical", "high", "medium"}
+ for source, name in [
+ (TABOO_WORDS, "TABOO_WORDS"),
+ (TABOO_COLORS, "TABOO_COLORS"),
+ (TABOO_NUMBERS, "TABOO_NUMBERS"),
+ (TABOO_HOLIDAYS, "TABOO_HOLIDAYS"),
+ (SACRED_CONTEXTS, "SACRED_CONTEXTS"),
+ ]:
+ for i, entry in enumerate(source):
+ assert entry["severity"] in valid, f"{name}[{i}] severity={entry['severity']} not valid"
+
+
+# ═══════════════════════════════════════════════════════════════
+# Locale coverage
+# ═══════════════════════════════════════════════════════════════
+
+class TestLocaleCoverage:
+ """Must cover major world locales."""
+
+ def test_supported_locales(self):
+ locales = get_supported_locales()
+ assert len(locales) >= 14, f"Should cover 14+ locales, got {len(locales)}"
+ # Key locales must be present
+ assert "zh-CN" in locales
+ assert "ja-JP" in locales
+ assert "ar-SA" in locales
+ assert "en-US" in locales
+ assert "hi-IN" in locales
+
+ def test_every_taboo_word_has_known_locale(self):
+ known = set(get_supported_locales())
+ for entry in TABOO_WORDS:
+ assert entry["locale"] in known, f"Unknown locale {entry['locale']} in TABOO_WORDS"
+
+ def test_every_taboo_color_has_known_locale(self):
+ known = set(get_supported_locales())
+ for entry in TABOO_COLORS:
+ assert entry["locale"] in known, f"Unknown locale {entry['locale']} in TABOO_COLORS"
+
+ def test_every_taboo_number_has_known_locale(self):
+ known = set(get_supported_locales())
+ for entry in TABOO_NUMBERS:
+ assert entry["locale"] in known, f"Unknown locale {entry['locale']} in TABOO_NUMBERS"
+
+
+# ═══════════════════════════════════════════════════════════════
+# Query helpers
+# ═══════════════════════════════════════════════════════════════
+
+class TestQueryHelpers:
+ """get_* functions filter or return all."""
+
+ def test_get_taboo_words_all(self):
+ all_words = get_taboo_words()
+ assert len(all_words) == len(TABOO_WORDS)
+
+ def test_get_taboo_words_filtered(self):
+ zh_words = get_taboo_words("zh-CN")
+ assert all(w["locale"] == "zh-CN" for w in zh_words)
+ assert len(zh_words) >= 5
+
+ def test_get_taboo_words_unknown_locale_returns_empty(self):
+ assert get_taboo_words("xx-XX") == []
+
+ def test_get_taboo_colors_filtered(self):
+ ja_colors = get_taboo_colors("ja-JP")
+ assert all(c["locale"] == "ja-JP" for c in ja_colors)
+
+ def test_get_taboo_numbers_filtered(self):
+ zh_numbers = get_taboo_numbers("zh-CN")
+ assert len(zh_numbers) >= 3 # 4, 8, 7, 14, 0
+ assert any(n["number"] == 4 for n in zh_numbers)
+
+ def test_get_taboo_holidays_filtered(self):
+ us_holidays = get_taboo_holidays("en-US")
+ assert any("9月11日" in h["period"] or "Memorial" in h["period"] for h in us_holidays)
+
+ def test_get_sacred_contexts_global(self):
+ global_rules = get_sacred_contexts("*")
+ assert len(global_rules) >= 5 # global rules are locale="*"
+ # Should include global entries
+ assert any("儿童用户" in s["context"] for s in global_rules)
+
+ def test_get_sacred_contexts_locale_specific(self):
+ zh_rules = get_sacred_contexts("zh-CN")
+ # Should include both global (*) and zh-CN entries
+ assert any("天安门" in s["context"] for s in zh_rules)
+
+
+# ═══════════════════════════════════════════════════════════════
+# Matrix summary
+# ═══════════════════════════════════════════════════════════════
+
+class TestMatrixSummary:
+ """get_matrix_summary returns consistent statistics."""
+
+ def test_summary_counts_match_sources(self):
+ summary = get_matrix_summary()
+ assert summary["taboo_words"] == len(TABOO_WORDS)
+ assert summary["taboo_colors"] == len(TABOO_COLORS)
+ assert summary["taboo_numbers"] == len(TABOO_NUMBERS)
+ assert summary["taboo_holidays"] == len(TABOO_HOLIDAYS)
+ assert summary["sacred_contexts"] == len(SACRED_CONTEXTS)
+ assert summary["total_entries"] == (
+ len(TABOO_WORDS) + len(TABOO_COLORS) + len(TABOO_NUMBERS) +
+ len(TABOO_HOLIDAYS) + len(SACRED_CONTEXTS)
+ )
+
+ def test_locales_covered_positive(self):
+ summary = get_matrix_summary()
+ assert summary["locales_covered"] >= 14
+
+
+# ═══════════════════════════════════════════════════════════════
+# Specific content checks
+# ═══════════════════════════════════════════════════════════════
+
+class TestSpecificContent:
+ """Critical taboo entries must be present for key locales."""
+
+ def test_zh_CN_has_taiwan_sensitivity(self):
+ zh_words = get_taboo_words("zh-CN")
+ # 台独 should be present
+ assert any("台独" in w["word"] or "独" in w["contexts"] for w in zh_words), (
+ "zh-CN must cover Taiwan-related political sensitivity"
+ )
+
+ def test_ar_SA_has_islamic_taboos(self):
+ ar_words = get_taboo_words("ar-SA")
+ assert len(ar_words) >= 3, "ar-SA must have Islamic taboo words"
+
+ def test_ja_JP_has_burakumin(self):
+ ja_words = get_taboo_words("ja-JP")
+ assert any("部落" in w["word"] for w in ja_words), "ja-JP must cover burakumin"
+
+ def test_en_US_has_racial_slurs(self):
+ en_words = get_taboo_words("en-US")
+ assert len(en_words) >= 4, "en-US must cover racial slur taboos"
+
+ def test_number_4_is_taboo_east_asia(self):
+ for locale in ["zh-CN", "ja-JP", "ko-KR"]:
+ nums = get_taboo_numbers(locale)
+ assert any(n["number"] == 4 for n in nums), f"{locale} must have 4 as taboo"
+
+ def test_number_13_is_taboo_western(self):
+ for locale in ["en-US", "en-GB"]:
+ nums = get_taboo_numbers(locale)
+ assert any(n["number"] == 13 for n in nums), f"{locale} must have 13 as taboo"
+
+ def test_holocaust_taboo_words(self):
+ he_words = get_taboo_words("he-IL")
+ de_words = get_taboo_words("de-DE")
+ assert any("שואה" in w["word"] for w in he_words), "he-IL must cover Holocaust"
+ assert any("Hitler" in w["word"] for w in de_words), "de-DE must cover Nazi references"
+
+ def test_ramadan_coverage(self):
+ ar_holidays = get_taboo_holidays("ar-SA")
+ assert any("Ramadan" in h["period"] or "斋月" in h["period"] for h in ar_holidays), (
+ "ar-SA must cover Ramadan"
+ )
diff --git a/runtime/tutor/eval_replay.py b/runtime/tutor/eval_replay.py
index 3cd41a1..e339ccc 100644
--- a/runtime/tutor/eval_replay.py
+++ b/runtime/tutor/eval_replay.py
@@ -20,8 +20,6 @@
from pathlib import Path
from typing import Any
-from loguru import logger
-
from runtime.config.settings import get_settings
diff --git a/runtime/tutor/explainer.py b/runtime/tutor/explainer.py
index 503963d..c5a5f19 100644
--- a/runtime/tutor/explainer.py
+++ b/runtime/tutor/explainer.py
@@ -11,11 +11,10 @@
from __future__ import annotations
from dataclasses import dataclass, field
-from typing import Any
from loguru import logger
-from runtime.tutor.i18n import card_text, get_lang, t
+from runtime.tutor.i18n import get_lang, t
from runtime.tutor.theory_kb import get_kb
from runtime.tutor.verbosity import Mode, get_mode
diff --git a/runtime/tutor/graph.py b/runtime/tutor/graph.py
index ad958c0..527aad6 100644
--- a/runtime/tutor/graph.py
+++ b/runtime/tutor/graph.py
@@ -18,8 +18,9 @@
import re
from collections import defaultdict
+from collections.abc import Iterable
from dataclasses import dataclass, field
-from typing import Iterable, Literal
+from typing import Literal
from loguru import logger
diff --git a/skills/README.md b/skills/README.md
new file mode 100644
index 0000000..bc9041c
--- /dev/null
+++ b/skills/README.md
@@ -0,0 +1,104 @@
+# skills 索引
+
+**32 业务 Skill + 3 元 Skill**。
+
+业务 skill 按领域分:通用流程 8 + 平台专项 5 + 渗透安全 7 + 车载 5 + ECC 测试加固 6 + 探索学习 1 = 32。
+元 skill 3 个子目录:`darwin-skill/` `karpathy-guidelines/` `nuwa-skill/` — 用法见各子目录 SKILL.md。
+
+顶层导航见根目录 [00-项目导航.md](../00-项目导航.md);路线图见 [ROADMAP.md](../ROADMAP.md)。
+
+---
+
+## 类别 1:通用流程 8 Skill
+
+| Skill | 文件 | 用途 | 触发示例 |
+|-------|-----|------|---------|
+| `/smoke-test` | `smoke-test.md` | P0 冒烟(≥95% 门禁,11min 上限) | 上线前快速验证 |
+| `/test-coordinator` | `test-coordinator.md` | 完整流程编排(自动平台路由) | 新功能完整测试 |
+| `/regression-test` | `regression-test.md` | P0+P1 回归 + Flaky + JMeter | 迭代发布前 |
+| `/testcase-design` | `testcase-design.md` | 4 Sheet Excel 用例 | 评审前 / 手测前 |
+| `/python-script-gen` | `python-script-gen.md` | pytest UI/API 脚本生成 | 用例转自动化 |
+| `/jmeter-script-gen` | `jmeter-script-gen.md` | JMeter JMX 性能计划(双模式 ci_quick/full) | 性能测试 |
+| `/data-preparation` | `data-preparation.md` | 测试数据 + JMeter CSV 生成 | 测试前数据准备 |
+| `/zentao-bug-submission` | `zentao-bug-submission.md` | BugTracker 规范提交(默认禅道示例,可换 Jira/GitHub/GitLab/Linear/Webhook,见 `BUG_TRACKER` env) | 失败用例后 |
+
+---
+
+## 类别 2:平台专项 5 Skill(按产品形态选)
+
+| Skill | 文件 | 平台 | 必装外部依赖 |
+|-------|-----|------|-------------|
+| `/mobile-test` | `mobile-test.md` | Android / iOS / 微信/支付宝小程序 | Appium server / Android SDK / Xcode / 微信开发者工具 |
+| `/desktop-test` | `desktop-test.md` | Windows EXE / macOS .app / Linux GUI / Electron | pywinauto(Win) / pyautogui / Playwright |
+| `/visual-test` | `visual-test.md` | 游戏 / Canvas / WebGL / OCR / 视觉回归 | Airtest / Tesseract / OpenCV |
+| `/system-test` | `system-test.md` | IoT / 音视频 / 链路追踪 / 消息队列 | FFmpeg / Jaeger / Kafka 或 RabbitMQ |
+| `/ai-test` | `ai-test.md` | AI/ML 模型 / LLM 应用 | 推理服务 endpoint / LLM API |
+
+---
+
+---
+## 类别 3:渗透安全 7 Skill
+
+| Skill | 文件 | 用途 | 触发示例 |
+|-------|-----|------|---------|
+| `/pentest-coordinator` | `pentest-coordinator.md` | 渗透测试总协调(自动路由子 skill) | 安全测试启动 |
+| `/pentest-recon` | `pentest-recon.md` | 信息收集与资产侦察 | 渗透前信息收集 |
+| `/pentest-vuln` | `pentest-vuln.md` | 漏洞扫描与验证 | 自动化漏洞检测 |
+| `/pentest-exploit` | `pentest-exploit.md` | 漏洞利用与 PoC 验证 | 漏洞复现 |
+| `/pentest-web` | `pentest-web.md` | Web 应用渗透(OWASP Top 10) | Web 安全测试 |
+| `/pentest-api` | `pentest-api.md` | API 渗透测试(JWT/OAuth/GraphQL) | API 安全测试 |
+| `/pentest-report` | `pentest-report.md` | 渗透测试报告生成 | 安全评估输出 |
+
+## 类别 4:车载 5 Skill
+
+| Skill | 文件 | 用途 | 触发示例 |
+|-------|-----|------|---------|
+| `/automotive-test` | `automotive-test.md` | 车载测试总协调 | 车载系统测试 |
+| `/automotive-can-bus-test` | `automotive-can-bus-test.md` | CAN 总线协议测试 | CAN 报文验证 |
+| `/automotive-adas-scenario` | `automotive-adas-scenario.md` | ADAS 场景测试 | 辅助驾驶验证 |
+| `/automotive-hil-loop-test` | `automotive-hil-loop-test.md` | HIL 硬件在环测试 | 硬件在环验证 |
+| `/automotive-ota-update-test` | `automotive-ota-update-test.md` | OTA 升级测试 | 远程升级验证 |
+
+## 类别 5:ECC 测试加固 6 Skill
+
+| Skill | 文件 | 用途 | 触发示例 |
+|-------|-----|------|---------|
+| `/tdd-workflow` | `tdd-workflow.md` | 测试驱动开发工作流 | 新功能开发 |
+| `/e2e-testing` | `e2e-testing.md` | 端到端测试(Playwright) | 关键用户流程 |
+| `/verification-loop` | `verification-loop.md` | 验证循环(自检+修复) | 持续质量检查 |
+| `/eval-harness` | `eval-harness.md` | 评估框架(LLM-as-judge) | AI 输出质量评估 |
+| `/security-review` | `security-review.md` | 安全代码审查 | 代码提交前安全检查 |
+| `/agent-introspection-debugging` | `agent-introspection-debugging.md` | Agent 自省调试 | Agent 行为异常排查 |
+
+## 类别 6:探索学习 1 Skill
+
+| Skill | 文件 | 用途 | 触发示例 |
+|-------|-----|------|---------|
+| `/build-your-own-x-explorer` | `build-your-own-x-explorer.md` | 探索式学习(BYO-X 框架) | 新技术评估 / 实验 |
+
+## 元 Skill 3 个(子目录)
+
+| 元 Skill | 目录 | 用途 |
+|----------|------|------|
+| `darwin-skill` | `darwin-skill/` | Skill 自进化棘轮优化 |
+| `karpathy-guidelines` | `karpathy-guidelines/` | Karpathy 编码纪律注入 |
+| `nuwa-skill` | `nuwa-skill/` | 女娲:人物思维框架蒸馏 |
+
+---
+## 每个 Skill 文件结构
+
+每个 skill 文件统一包含以下章节:
+
+1. **YAML frontmatter**(name / description / tools)
+2. **🔔 开测前准备清单**(平台 skill 含此段,列必备 + 可选项)
+3. **触发方式**(`/skill-name`)
+4. **适用场景**
+5. **执行流程**(Step 1, 2, 3...)
+6. **质量门禁**
+7. **输出文件**
+
+---
+
+## 添加新 Skill
+
+详见根目录 [`CONTRIBUTING.md`](../CONTRIBUTING.md) "添加新 Skill" 章节。
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/agent-introspection-debugging.md" b/skills/agent-introspection-debugging.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/agent-introspection-debugging.md"
rename to skills/agent-introspection-debugging.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md" b/skills/ai-test.md
similarity index 94%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md"
rename to skills/ai-test.md
index d41c321..8b1e705 100644
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/ai-test.md"
+++ b/skills/ai-test.md
@@ -21,7 +21,7 @@ SKILL_IMPL_STATUS: script
□ 黄金测试集 CSV(含 input + label 列)→ AI_GOLDEN_TEST_SET
□ 模型版本号 → AI_MODEL_VERSION(Bug 报告 buildFound)
□ 漂移基线数据(可选)→ AI_DRIFT_BASELINE
-□ 公平性测试集(含敏感属性列,如 gender)
+□ 公平性测试集(含敏感属性列,如 gender / race)+ fairness_auditor.py 偏见审计
□ LLM 测试用例 yaml → workspace/自动化脚本/python/ai/prompts/llm_eval_cases.yaml
```
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-adas-scenario.md" b/skills/automotive-adas-scenario.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-adas-scenario.md"
rename to skills/automotive-adas-scenario.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-can-bus-test.md" b/skills/automotive-can-bus-test.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-can-bus-test.md"
rename to skills/automotive-can-bus-test.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-hil-loop-test.md" b/skills/automotive-hil-loop-test.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-hil-loop-test.md"
rename to skills/automotive-hil-loop-test.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-ota-update-test.md" b/skills/automotive-ota-update-test.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-ota-update-test.md"
rename to skills/automotive-ota-update-test.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md" b/skills/automotive-test.md
similarity index 93%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md"
rename to skills/automotive-test.md
index 163a04e..7855f8f 100644
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/automotive-test.md"
+++ b/skills/automotive-test.md
@@ -26,7 +26,7 @@ SKILL_IMPL_STATUS: production
| 6 总线 CAN/LIN/FlexRay/Eth | `/automotive-can-bus-test` |
| 7 ADAS 场景 | `/automotive-adas-scenario` |
| 8 OTA 升级 | `/automotive-ota-update-test` |
-| 9 合规审计 | `mcp-compliance-checker iso-26262/sotif/r155/r156` |
+| 9 合规审计 | `compliance/engine.py` + 行业规则库(ISO 26262/SOTIF/R155/R156 Phase 2) |
| 10 报告 + Bug 单 | `report-generator` |
## 主宪章铁律
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/build-your-own-x-explorer.md" b/skills/build-your-own-x-explorer.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/build-your-own-x-explorer.md"
rename to skills/build-your-own-x-explorer.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/LICENSE" b/skills/darwin-skill/LICENSE
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/LICENSE"
rename to skills/darwin-skill/LICENSE
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/SKILL.md" b/skills/darwin-skill/SKILL.md
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/SKILL.md"
rename to skills/darwin-skill/SKILL.md
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/aso-hero.png" b/skills/darwin-skill/assets/aso-hero.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/aso-hero.png"
rename to skills/darwin-skill/assets/aso-hero.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-check.png" b/skills/darwin-skill/assets/banner-check.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-check.png"
rename to skills/darwin-skill/assets/banner-check.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-en-check.png" b/skills/darwin-skill/assets/banner-en-check.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-en-check.png"
rename to skills/darwin-skill/assets/banner-en-check.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-en.svg" b/skills/darwin-skill/assets/banner-en.svg
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner-en.svg"
rename to skills/darwin-skill/assets/banner-en.svg
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner.svg" b/skills/darwin-skill/assets/banner.svg
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/banner.svg"
rename to skills/darwin-skill/assets/banner.svg
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop-en.html" b/skills/darwin-skill/assets/chart-loop-en.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop-en.html"
rename to skills/darwin-skill/assets/chart-loop-en.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop-en.png" b/skills/darwin-skill/assets/chart-loop-en.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop-en.png"
rename to skills/darwin-skill/assets/chart-loop-en.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop.html" b/skills/darwin-skill/assets/chart-loop.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop.html"
rename to skills/darwin-skill/assets/chart-loop.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop.png" b/skills/darwin-skill/assets/chart-loop.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-loop.png"
rename to skills/darwin-skill/assets/chart-loop.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases-en.html" b/skills/darwin-skill/assets/chart-phases-en.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases-en.html"
rename to skills/darwin-skill/assets/chart-phases-en.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases-en.png" b/skills/darwin-skill/assets/chart-phases-en.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases-en.png"
rename to skills/darwin-skill/assets/chart-phases-en.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases.html" b/skills/darwin-skill/assets/chart-phases.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases.html"
rename to skills/darwin-skill/assets/chart-phases.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases.png" b/skills/darwin-skill/assets/chart-phases.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-phases.png"
rename to skills/darwin-skill/assets/chart-phases.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet-en.html" b/skills/darwin-skill/assets/chart-ratchet-en.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet-en.html"
rename to skills/darwin-skill/assets/chart-ratchet-en.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet-en.png" b/skills/darwin-skill/assets/chart-ratchet-en.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet-en.png"
rename to skills/darwin-skill/assets/chart-ratchet-en.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet.html" b/skills/darwin-skill/assets/chart-ratchet.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet.html"
rename to skills/darwin-skill/assets/chart-ratchet.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet.png" b/skills/darwin-skill/assets/chart-ratchet.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-ratchet.png"
rename to skills/darwin-skill/assets/chart-ratchet.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric-en.html" b/skills/darwin-skill/assets/chart-rubric-en.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric-en.html"
rename to skills/darwin-skill/assets/chart-rubric-en.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric-en.png" b/skills/darwin-skill/assets/chart-rubric-en.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric-en.png"
rename to skills/darwin-skill/assets/chart-rubric-en.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric.html" b/skills/darwin-skill/assets/chart-rubric.html
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric.html"
rename to skills/darwin-skill/assets/chart-rubric.html
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric.png" b/skills/darwin-skill/assets/chart-rubric.png
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/chart-rubric.png"
rename to skills/darwin-skill/assets/chart-rubric.png
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/hero.gif" b/skills/darwin-skill/assets/hero.gif
similarity index 100%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/assets/hero.gif"
rename to skills/darwin-skill/assets/hero.gif
diff --git "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/docs/index.html" b/skills/darwin-skill/docs/index.html
similarity index 99%
rename from "03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/docs/index.html"
rename to skills/darwin-skill/docs/index.html
index 2854eea..9b70d98 100644
--- "a/03-\346\212\200\350\203\275\345\256\232\344\271\211/darwin-skill/docs/index.html"
+++ b/skills/darwin-skill/docs/index.html
@@ -1056,4 +1056,4 @@ 概念映射