From 8d744f334fdd65de4bddd2fedeb74c1ee3e4c1a9 Mon Sep 17 00:00:00 2001 From: WellDunDun <45949032+WellDunDun@users.noreply.github.com> Date: Sat, 21 Mar 2026 11:50:19 +0300 Subject: [PATCH 01/10] Improve sandbox onboarding and alpha auth UX --- Makefile | 14 +- cli/selftune/auth/device-code.ts | 32 +++ cli/selftune/index.ts | 18 +- cli/selftune/init.ts | 25 ++- docs/design-docs/sandbox-claude-code.md | 32 ++- tests/init/device-code.test.ts | 15 ++ tests/sandbox/docker/entrypoint.sh | 9 +- tests/sandbox/fixtures/claude-settings.json | 59 ++++- tests/sandbox/run-install-sandbox.ts | 231 ++++++++++++++++++++ tests/sandbox/run-sandbox.ts | 64 ++++-- 10 files changed, 450 insertions(+), 49 deletions(-) create mode 100644 tests/sandbox/run-install-sandbox.ts diff --git a/Makefile b/Makefile index 4c00bc66..71bd3359 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: all clean lint test test-fast test-slow check typecheck-dashboard sandbox sandbox-llm sandbox-shell sandbox-openclaw sandbox-openclaw-keep sandbox-openclaw-clean clean-branches +.PHONY: all clean lint test test-fast test-slow check typecheck-dashboard sandbox sandbox-install sandbox-llm sandbox-shell sandbox-shell-empty sandbox-reset sandbox-openclaw sandbox-openclaw-keep sandbox-openclaw-clean clean-branches all: check @@ -24,11 +24,21 @@ test-slow: sandbox: bun run tests/sandbox/run-sandbox.ts +sandbox-install: + bun run tests/sandbox/run-install-sandbox.ts + sandbox-llm: docker compose -f tests/sandbox/docker/docker-compose.yml up --build sandbox-shell: - docker compose -f tests/sandbox/docker/docker-compose.yml run --build --entrypoint bash selftune-sandbox + docker compose -f tests/sandbox/docker/docker-compose.yml run --build selftune-sandbox bash + +sandbox-shell-empty: + docker compose -f tests/sandbox/docker/docker-compose.yml run --build -e SKIP_PROVISION=1 selftune-sandbox bash + +sandbox-reset: + -docker ps -aq --filter label=com.docker.compose.project=docker --filter label=com.docker.compose.service=selftune-sandbox | xargs docker rm -f + docker compose -f tests/sandbox/docker/docker-compose.yml down -v sandbox-openclaw: docker compose -f tests/sandbox/docker/docker-compose.openclaw.yml up --build diff --git a/cli/selftune/auth/device-code.ts b/cli/selftune/auth/device-code.ts index 765fedb1..0ce2e02b 100644 --- a/cli/selftune/auth/device-code.ts +++ b/cli/selftune/auth/device-code.ts @@ -22,6 +22,38 @@ export interface DeviceCodeResult { org_id: string; } +export function tryOpenUrl(url: string): boolean { + const command = + process.platform === "darwin" + ? ["open", url] + : process.platform === "linux" + ? ["xdg-open", url] + : process.platform === "win32" + ? ["cmd", "/c", "start", "", url] + : null; + + if (!command) return false; + if (process.platform !== "win32" && !Bun.which(command[0])) return false; + + try { + Bun.spawn(command, { stdout: "ignore", stderr: "ignore" }); + return true; + } catch { + return false; + } +} + +export function buildVerificationUrl(verificationUrl: string, userCode: string): string { + try { + const url = new URL(verificationUrl); + url.searchParams.set("code", userCode); + return url.toString(); + } catch { + const separator = verificationUrl.includes("?") ? "&" : "?"; + return `${verificationUrl}${separator}code=${encodeURIComponent(userCode)}`; + } +} + /** * Derive the cloud API base URL from SELFTUNE_ALPHA_ENDPOINT. * The endpoint is the push URL (e.g., https://api.selftune.dev/api/v1/push). diff --git a/cli/selftune/index.ts b/cli/selftune/index.ts index e798c8aa..73634592 100644 --- a/cli/selftune/index.ts +++ b/cli/selftune/index.ts @@ -673,33 +673,37 @@ Output: const { readAlphaIdentity, writeAlphaIdentity, generateUserId } = await import( "./alpha-identity.js" ); - const { requestDeviceCode, pollDeviceCode } = await import("./auth/device-code.js"); + const { buildVerificationUrl, pollDeviceCode, requestDeviceCode, tryOpenUrl } = + await import("./auth/device-code.js"); const { chmodSync } = await import("node:fs"); const existingIdentity = readAlphaIdentity(SELFTUNE_CONFIG_PATH); process.stderr.write("[alpha relink] Starting device-code authentication flow...\n"); const grant = await requestDeviceCode(); + const verificationUrlWithCode = buildVerificationUrl( + grant.verification_url, + grant.user_code, + ); console.log( JSON.stringify({ level: "info", code: "device_code_issued", verification_url: grant.verification_url, + verification_url_with_code: verificationUrlWithCode, user_code: grant.user_code, expires_in: grant.expires_in, - message: `Open ${grant.verification_url} and enter code: ${grant.user_code}`, + message: `Open ${verificationUrlWithCode} to approve.`, }), ); // Try to open browser - try { - const url = `${grant.verification_url}?code=${grant.user_code}`; - Bun.spawn(["open", url], { stdout: "ignore", stderr: "ignore" }); + if (tryOpenUrl(verificationUrlWithCode)) { process.stderr.write("[alpha relink] Browser opened. Waiting for approval...\n"); - } catch { + } else { process.stderr.write( - "[alpha relink] Could not open browser. Visit the URL above manually.\n", + `[alpha relink] Could not open browser. Visit ${verificationUrlWithCode} manually.\n`, ); } diff --git a/cli/selftune/init.ts b/cli/selftune/init.ts index 92886832..8d8744e7 100644 --- a/cli/selftune/init.ts +++ b/cli/selftune/init.ts @@ -35,7 +35,12 @@ import { readAlphaIdentity, } from "./alpha-identity.js"; import { TELEMETRY_NOTICE } from "./analytics.js"; -import { pollDeviceCode, requestDeviceCode } from "./auth/device-code.js"; +import { + buildVerificationUrl, + pollDeviceCode, + requestDeviceCode, + tryOpenUrl, +} from "./auth/device-code.js"; import { CLAUDE_CODE_HOOK_KEYS, SELFTUNE_CONFIG_DIR, SELFTUNE_CONFIG_PATH } from "./constants.js"; import type { AgentCommandGuidance, AlphaIdentity, SelftuneConfig } from "./types.js"; import { hookKeyHasSelftuneEntry } from "./utils/hooks.js"; @@ -548,6 +553,7 @@ export async function runInit(opts: InitOptions): Promise { process.stderr.write("[alpha] Starting device-code authentication flow...\n"); const grant = await requestDeviceCode(); + const verificationUrlWithCode = buildVerificationUrl(grant.verification_url, grant.user_code); // Emit structured JSON for the agent to parse console.log( @@ -555,25 +561,24 @@ export async function runInit(opts: InitOptions): Promise { level: "info", code: "device_code_issued", verification_url: grant.verification_url, + verification_url_with_code: verificationUrlWithCode, user_code: grant.user_code, expires_in: grant.expires_in, - message: `Open ${grant.verification_url} and enter code: ${grant.user_code}`, + message: `Open ${verificationUrlWithCode} to approve.`, }), ); // Try to open browser (skip in test environments) if (!process.env.BUN_ENV?.includes("test") && !process.env.SELFTUNE_NO_BROWSER) { - try { - const url = `${grant.verification_url}?code=${grant.user_code}`; - Bun.spawn(["open", url], { stdout: "ignore", stderr: "ignore" }); + if (tryOpenUrl(verificationUrlWithCode)) { process.stderr.write(`[alpha] Browser opened. Waiting for approval...\n`); - } catch { - process.stderr.write(`[alpha] Could not open browser. Visit the URL above manually.\n`); + } else { + process.stderr.write( + `[alpha] Could not open browser. Visit ${verificationUrlWithCode} manually.\n`, + ); } } else { - process.stderr.write( - `[alpha] Visit ${grant.verification_url}?code=${grant.user_code} to approve.\n`, - ); + process.stderr.write(`[alpha] Visit ${verificationUrlWithCode} to approve.\n`); } process.stderr.write("[alpha] Polling"); diff --git a/docs/design-docs/sandbox-claude-code.md b/docs/design-docs/sandbox-claude-code.md index ced7c7ae..34244cf9 100644 --- a/docs/design-docs/sandbox-claude-code.md +++ b/docs/design-docs/sandbox-claude-code.md @@ -19,18 +19,30 @@ Claude Code-specific sandbox configuration, tests, and Docker container. See [sa | Command | Expected Behavior | |---------|-------------------| | `doctor` | Config + logs validated, hooks detected in settings.json | -| `evals --skill find-skills` | 6 positives, 24 negatives generated | -| `evals --skill frontend-design` | 0 positives (correctly identifies undertriggering) | +| `eval generate --skill find-skills` | 6 positives, 24 negatives generated | +| `eval generate --skill frontend-design` | 0 positives (correctly identifies undertriggering) | | `status` | Colored table with per-skill health | | `last` | Latest session insight with unmatched queries | | `dashboard --export` | Standalone HTML with embedded data | | `contribute --preview` | Sanitized contribution bundle | | Hook: prompt-log | Record appended to all_queries_log.jsonl | -| Hook: skill-eval | Record appended to skill_usage_log.jsonl | +| Hook: skill-eval | `skill_invocation` appended to canonical log / SQLite | | Hook: session-stop | Record appended to session_telemetry_log.jsonl | **Performance:** 10 tests in ~400ms. +### Empty-State Install Test + +`make sandbox-install` starts from a blank HOME and validates the real setup path: + +- `selftune init --agent claude_code --cli-path --force` +- config file created under `~/.selftune/config.json` +- Claude hooks installed into `~/.claude/settings.json` +- `selftune doctor` reports `hook_settings` as passing +- re-running `selftune init` without `--force` is idempotent + +This complements `make sandbox`, which is still a seeded smoke test against fixture data. + ## Layer 2: Devcontainer + `claude -p` **Location:** `tests/sandbox/docker/` (Docker files) @@ -98,9 +110,12 @@ These are candidates for future test expansion. ```bash # Layer 1: Local (free, fast) make sandbox +make sandbox-install # Layer 2: First-time auth setup (one-time) -make sandbox-shell # drop into container +make sandbox-reset # clear persisted Docker sandbox HOME if needed +make sandbox-shell # drop into provisioned container +make sandbox-shell-empty # drop into blank "white room" container claude login # paste token, then exit # Layer 2: Run LLM tests (auth persists in Docker volume) @@ -109,10 +124,19 @@ make sandbox-llm # Interactive container access make sandbox-shell +# White-room manual onboarding +make sandbox-reset +make sandbox-shell-empty + # Full check: lint + unit tests + sandbox make check ``` +Use `sandbox-shell-empty` when you want a blank Claude/selftune state and plan +to install skills manually, then tell Claude "setup selftune" and observe the +actual onboarding path. Use `sandbox-shell` when you want the preseeded fixture +environment for repeatable functional checks. + **Auth options:** `claude login` inside the container (persists in Docker volume), `ANTHROPIC_API_KEY` in `.env.local`, or VS Code devcontainer. ## Future Work diff --git a/tests/init/device-code.test.ts b/tests/init/device-code.test.ts index e8b1d7eb..431ace57 100644 --- a/tests/init/device-code.test.ts +++ b/tests/init/device-code.test.ts @@ -8,6 +8,7 @@ import { afterEach, describe, expect, it } from "bun:test"; import { + buildVerificationUrl, getBaseUrl, pollDeviceCode, requestDeviceCode, @@ -53,6 +54,20 @@ describe("getBaseUrl", () => { }); }); +describe("buildVerificationUrl", () => { + it("appends the device code as a query parameter", () => { + expect(buildVerificationUrl("https://app.selftune.dev/auth/device", "ABCD-1234")).toBe( + "https://app.selftune.dev/auth/device?code=ABCD-1234", + ); + }); + + it("preserves existing query parameters", () => { + expect(buildVerificationUrl("https://app.selftune.dev/auth/device?foo=bar", "ABCD-1234")).toBe( + "https://app.selftune.dev/auth/device?foo=bar&code=ABCD-1234", + ); + }); +}); + // --------------------------------------------------------------------------- // requestDeviceCode // --------------------------------------------------------------------------- diff --git a/tests/sandbox/docker/entrypoint.sh b/tests/sandbox/docker/entrypoint.sh index 333636b3..bb8bc2e4 100755 --- a/tests/sandbox/docker/entrypoint.sh +++ b/tests/sandbox/docker/entrypoint.sh @@ -9,8 +9,13 @@ set -euo pipefail # Ensure sandbox HOME is owned by node (handles stale Docker volumes) sudo chown -R node:node "${HOME}" -# Provision fixtures into the sandbox HOME (idempotent) -bash /app/tests/sandbox/provision-claude.sh "${HOME}" /app +# Provision fixtures into the sandbox HOME (idempotent) unless explicitly skipped +if [ "${SKIP_PROVISION:-0}" != "1" ]; then + bash /app/tests/sandbox/provision-claude.sh "${HOME}" /app +else + mkdir -p "${HOME}/.claude" "${HOME}/.selftune" + echo "Skipping sandbox fixture provisioning (SKIP_PROVISION=1)." +fi # Run the provided command (default: run-with-llm.ts) exec "$@" diff --git a/tests/sandbox/fixtures/claude-settings.json b/tests/sandbox/fixtures/claude-settings.json index e0fdccee..2d0480ec 100644 --- a/tests/sandbox/fixtures/claude-settings.json +++ b/tests/sandbox/fixtures/claude-settings.json @@ -1,7 +1,60 @@ { "hooks": { - "prompt-submit": [{ "command": "selftune prompt-log" }], - "post-tool-use": [{ "command": "selftune skill-eval" }], - "session-stop": [{ "command": "selftune session-stop" }] + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/prompt-log.ts", + "timeout": 5 + }, + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/auto-activate.ts", + "timeout": 5 + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "Write|Edit", + "hooks": [ + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/skill-change-guard.ts", + "timeout": 5 + }, + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/evolution-guard.ts", + "timeout": 5 + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Read|Skill", + "hooks": [ + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/skill-eval.ts", + "timeout": 5 + } + ] + } + ], + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "bun run /app/cli/selftune/hooks/session-stop.ts", + "timeout": 15 + } + ] + } + ] } } diff --git a/tests/sandbox/run-install-sandbox.ts b/tests/sandbox/run-install-sandbox.ts new file mode 100644 index 00000000..a76cdae3 --- /dev/null +++ b/tests/sandbox/run-install-sandbox.ts @@ -0,0 +1,231 @@ +#!/usr/bin/env bun +/** + * Empty-state sandbox install test for selftune. + * + * Verifies the real setup path from a blank HOME: + * 1. `selftune init` writes config + * 2. Claude settings hooks are installed + * 3. `selftune doctor` reports hook_settings as pass + * 4. re-running init without --force is idempotent + */ + +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { dirname, join, resolve } from "node:path"; + +interface TestResult { + name: string; + command: string; + exitCode: number; + passed: boolean; + durationMs: number; + stdout: string; + stderr: string; + error?: string; +} + +const PROJECT_ROOT = resolve(import.meta.dir, "..", ".."); +const CLI_PATH = join(PROJECT_ROOT, "cli", "selftune", "index.ts"); +const SANDBOX_ROOT = mkdtempSync(join(tmpdir(), "selftune-install-sandbox-")); +const SANDBOX_HOME = join(SANDBOX_ROOT, "home"); +const CONFIG_PATH = join(SANDBOX_HOME, ".selftune", "config.json"); +const SETTINGS_PATH = join(SANDBOX_HOME, ".claude", "settings.json"); + +mkdirSync(join(SANDBOX_HOME, ".claude"), { recursive: true }); + +const sandboxEnv = { + ...process.env, + HOME: SANDBOX_HOME, + SELFTUNE_HOME: SANDBOX_HOME, + NO_COLOR: "1", +}; + +async function runCliCommand(name: string, args: string[]): Promise { + const command = `bun run ${CLI_PATH} ${args.join(" ")}`; + const start = performance.now(); + + try { + const proc = Bun.spawn(["bun", "run", CLI_PATH, ...args], { + env: sandboxEnv, + stdout: "pipe", + stderr: "pipe", + cwd: PROJECT_ROOT, + }); + + const [stdout, stderr] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + ]); + const exitCode = await proc.exited; + + return { + name, + command, + exitCode, + passed: exitCode === 0, + durationMs: Math.round(performance.now() - start), + stdout, + stderr, + }; + } catch (error) { + return { + name, + command, + exitCode: 1, + passed: false, + durationMs: Math.round(performance.now() - start), + stdout: "", + stderr: "", + error: error instanceof Error ? error.message : String(error), + }; + } +} + +function readJson(path: string): unknown { + return JSON.parse(readFileSync(path, "utf-8")); +} + +function formatRow(columns: string[], widths: number[]): string { + return `| ${columns.map((column, i) => column.padEnd(widths[i])).join(" | ")} |`; +} + +function printSummary(results: TestResult[]): void { + const nameWidth = Math.max(...results.map((r) => r.name.length), "Test".length); + const statusWidth = Math.max(...results.map((r) => (r.passed ? 4 : 4)), "Status".length); + const durationWidth = Math.max(...results.map((r) => `${r.durationMs}ms`.length), "Duration".length); + const widths = [nameWidth, statusWidth, durationWidth]; + const separator = `+${widths.map((w) => "-".repeat(w + 2)).join("+")}+`; + + console.log(`\n${separator}`); + console.log(formatRow(["Test", "Status", "Duration"], widths)); + console.log(separator); + for (const result of results) { + console.log( + formatRow([result.name, result.passed ? "PASS" : "FAIL", `${result.durationMs}ms`], widths), + ); + } + console.log(`${separator}\n`); + + const passed = results.filter((r) => r.passed).length; + console.log(`Results: ${passed}/${results.length} passed`); +} + +async function main(): Promise { + console.log("\nSelftune Empty-State Install Sandbox"); + console.log("===================================="); + console.log(`Sandbox: ${SANDBOX_ROOT}`); + console.log(`Project: ${PROJECT_ROOT}\n`); + + const results: TestResult[] = []; + + try { + const initResult = await runCliCommand("init", [ + "init", + "--agent", + "claude_code", + "--cli-path", + CLI_PATH, + "--force", + ]); + if (initResult.passed) { + if (!existsSync(CONFIG_PATH)) { + initResult.passed = false; + initResult.error = `Expected config at ${CONFIG_PATH}`; + } else if (!existsSync(SETTINGS_PATH)) { + initResult.passed = false; + initResult.error = `Expected Claude settings at ${SETTINGS_PATH}`; + } else { + const config = readJson(CONFIG_PATH) as Record; + if (config.agent_type !== "claude_code" || config.hooks_installed !== true) { + initResult.passed = false; + initResult.error = "Expected claude_code config with hooks_installed=true"; + } + } + } + results.push(initResult); + + const hookInstallResult: TestResult = { + name: "installed hooks", + command: SETTINGS_PATH, + exitCode: 0, + passed: true, + durationMs: 0, + stdout: "", + stderr: "", + }; + if (existsSync(SETTINGS_PATH)) { + const settings = readJson(SETTINGS_PATH) as { + hooks?: Record>>; + }; + const hooks = settings.hooks ?? {}; + const requiredKeys = ["UserPromptSubmit", "PreToolUse", "PostToolUse", "Stop"]; + const missing = requiredKeys.filter((key) => !Array.isArray(hooks[key])); + const promptCommand = + hooks.UserPromptSubmit?.[0]?.hooks && + Array.isArray(hooks.UserPromptSubmit[0].hooks) && + typeof hooks.UserPromptSubmit[0].hooks[0]?.command === "string" + ? (hooks.UserPromptSubmit[0].hooks[0].command as string) + : ""; + if (missing.length > 0) { + hookInstallResult.passed = false; + hookInstallResult.error = `Missing hook keys: ${missing.join(", ")}`; + } else if (!promptCommand.includes(`${dirname(CLI_PATH)}/hooks/prompt-log.ts`)) { + hookInstallResult.passed = false; + hookInstallResult.error = "Prompt hook command did not resolve to the workspace hook path"; + } + } else { + hookInstallResult.passed = false; + hookInstallResult.error = `Missing settings file at ${SETTINGS_PATH}`; + } + results.push(hookInstallResult); + + const doctorResult = await runCliCommand("doctor", ["doctor"]); + if (doctorResult.passed) { + try { + const parsed = JSON.parse(doctorResult.stdout) as { + checks?: Array<{ name?: string; status?: string }>; + }; + const hookCheck = parsed.checks?.find((check) => check.name === "hook_settings"); + if (hookCheck?.status !== "pass") { + doctorResult.passed = false; + doctorResult.error = `Expected hook_settings=pass, got ${hookCheck?.status ?? "missing"}`; + } + } catch (error) { + doctorResult.passed = false; + doctorResult.error = + error instanceof Error ? `Failed to parse doctor JSON: ${error.message}` : String(error); + } + } + results.push(doctorResult); + + const idempotentResult = await runCliCommand("init (idempotent)", [ + "init", + "--agent", + "claude_code", + "--cli-path", + CLI_PATH, + ]); + if (idempotentResult.passed && !idempotentResult.stderr.includes("Already initialized")) { + idempotentResult.passed = false; + idempotentResult.error = 'Expected stderr to include "Already initialized"'; + } + results.push(idempotentResult); + + printSummary(results); + + const failures = results.filter((result) => !result.passed); + if (failures.length > 0) { + console.error("\n--- Failures ---\n"); + for (const failure of failures) { + console.error(`[${failure.name}] exit=${failure.exitCode}`); + if (failure.error) console.error(` Error: ${failure.error}`); + if (failure.stderr.trim()) console.error(` Stderr: ${failure.stderr.trim().slice(0, 400)}`); + } + process.exit(1); + } + } finally { + rmSync(SANDBOX_ROOT, { recursive: true, force: true }); + } +} + +await main(); diff --git a/tests/sandbox/run-sandbox.ts b/tests/sandbox/run-sandbox.ts index 6cb93114..6ba8f24d 100644 --- a/tests/sandbox/run-sandbox.ts +++ b/tests/sandbox/run-sandbox.ts @@ -327,6 +327,20 @@ function countLines(filePath: string): number { return content.split("\n").length; } +function countCanonicalRecordsByKind(filePath: string, recordKind: string): number { + if (!existsSync(filePath)) return 0; + const content = readFileSync(filePath, "utf-8").trim(); + if (!content) return 0; + return content.split("\n").reduce((count, line) => { + try { + const record = JSON.parse(line) as Record; + return record.record_kind === recordKind ? count + 1 : count; + } catch { + return count; + } + }, 0); +} + function _fileHasNewContent(filePath: string, minLines: number): boolean { return countLines(filePath) >= minLines; } @@ -368,10 +382,11 @@ async function main(): Promise { } results.push(doctorResult); - // b. evals --skill find-skills + // b. eval generate --skill find-skills const evalsOutput = join(SANDBOX_HOME, "find-skills_eval.json"); - const evalsFsResult = await runCliCommand("evals (find-skills)", [ - "evals", + const evalsFsResult = await runCliCommand("eval generate (find-skills)", [ + "eval", + "generate", "--skill", "find-skills", "--output", @@ -379,10 +394,11 @@ async function main(): Promise { ]); results.push(evalsFsResult); - // c. evals --skill frontend-design + // c. eval generate --skill frontend-design const evalsFeOutput = join(SANDBOX_HOME, "frontend-design_eval.json"); - const evalsFeResult = await runCliCommand("evals (frontend-design)", [ - "evals", + const evalsFeResult = await runCliCommand("eval generate (frontend-design)", [ + "eval", + "generate", "--skill", "frontend-design", "--output", @@ -503,13 +519,17 @@ async function main(): Promise { ); const toolUsePayload = JSON.parse( readFileSync(join(FIXTURES_DIR, "hook-payloads", "post-tool-use.json"), "utf-8"), - ); + ) as Record; const sessionStopPayload = JSON.parse( readFileSync(join(FIXTURES_DIR, "hook-payloads", "session-stop.json"), "utf-8"), ); + const canonicalLogPath = join(SANDBOX_CLAUDE_DIR, "canonical_telemetry_log.jsonl"); + toolUsePayload.transcript_path = join(SANDBOX_PROJECTS_DIR, "session-001.jsonl"); + toolUsePayload.session_id = "session-001"; // a. prompt-log hook const queryLogPath = join(SANDBOX_CLAUDE_DIR, "all_queries_log.jsonl"); + const skillLogPath = join(SANDBOX_CLAUDE_DIR, "skill_usage_log.jsonl"); const queryLinesBefore = countLines(queryLogPath); const promptHookResult = await runHook( @@ -526,18 +546,17 @@ async function main(): Promise { results.push(promptHookResult); // b. skill-eval hook - const skillLogPath = join(SANDBOX_CLAUDE_DIR, "skill_usage_log.jsonl"); - const skillLinesBefore = countLines(skillLogPath); + const skillInvocationsBefore = countCanonicalRecordsByKind(canonicalLogPath, "skill_invocation"); const skillHookResult = await runHook( "hook: skill-eval", join(hooksDir, "skill-eval.ts"), toolUsePayload, ); - const skillLinesAfter = countLines(skillLogPath); - if (skillHookResult.passed && skillLinesAfter <= skillLinesBefore) { + const skillInvocationsAfter = countCanonicalRecordsByKind(canonicalLogPath, "skill_invocation"); + if (skillHookResult.passed && skillInvocationsAfter <= skillInvocationsBefore) { skillHookResult.passed = false; - skillHookResult.error = `Expected new record in skill_usage_log.jsonl (before: ${skillLinesBefore}, after: ${skillLinesAfter})`; + skillHookResult.error = `Expected new skill_invocation in canonical_telemetry_log.jsonl (before: ${skillInvocationsBefore}, after: ${skillInvocationsAfter})`; } results.push(skillHookResult); @@ -561,9 +580,10 @@ async function main(): Promise { // OpenClaw integration tests // ----------------------------------------------------------------------- - // a. ingest-openclaw — standard ingestion - const ingestResult = await runCliCommand("ingest-openclaw", [ - "ingest-openclaw", + // a. ingest openclaw — standard ingestion + const ingestResult = await runCliCommand("ingest openclaw", [ + "ingest", + "openclaw", "--agents-dir", SANDBOX_OPENCLAW_AGENTS, ]); @@ -626,11 +646,12 @@ async function main(): Promise { } results.push(ingestResult); - // b. ingest-openclaw --dry-run + // b. ingest openclaw --dry-run // First, count current lines in query log to verify dry-run doesn't add const queryLinesBeforeDry = countLines(queryLogPath); - const dryRunResult = await runCliCommand("ingest-openclaw --dry-run", [ - "ingest-openclaw", + const dryRunResult = await runCliCommand("ingest openclaw --dry-run", [ + "ingest", + "openclaw", "--agents-dir", SANDBOX_OPENCLAW_AGENTS, "--dry-run", @@ -644,9 +665,10 @@ async function main(): Promise { } results.push(dryRunResult); - // c. ingest-openclaw (idempotent) — second run should find 0 new sessions - const idempotentResult = await runCliCommand("ingest-openclaw (idempotent)", [ - "ingest-openclaw", + // c. ingest openclaw (idempotent) — second run should find 0 new sessions + const idempotentResult = await runCliCommand("ingest openclaw (idempotent)", [ + "ingest", + "openclaw", "--agents-dir", SANDBOX_OPENCLAW_AGENTS, ]); From cf66c62f413ab0c4db35505ccf520269f7a7d54c Mon Sep 17 00:00:00 2001 From: WellDunDun <45949032+WellDunDun@users.noreply.github.com> Date: Sun, 22 Mar 2026 12:48:46 +0300 Subject: [PATCH 02/10] refactor: migrate from Biome to oxc ecosystem (oxlint + oxfmt) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace Biome v2.4.7 with oxlint for linting and oxfmt for formatting. oxlint is 50-100x faster than ESLint with 699 built-in rules; oxfmt is 2x faster than Biome with 100% Prettier JS/TS conformance. - Add .oxlintrc.json (correctness errors, suspicious warnings, TS/unicorn/oxc/import plugins) - Add .oxfmtrc.json (migrated from biome config, 2-space indent, 100-char width, import sorting) - Fix 7 real code issues found by oxlint (unused imports, dead code, Array constructor, regex→startsWith) - Suppress 7 noisy/inapplicable rules (no-shadow, unicorn/no-array-sort, etc.) - Update package.json scripts, Makefile, and CI workflow to use oxlint + oxfmt - Delete biome.json and remove @biomejs/biome dependency - Update CONTRIBUTING.md and .coderabbit.yaml references Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/agents/diagnosis-analyst.md | 26 +- .claude/agents/evolution-reviewer.md | 20 +- .claude/agents/integration-guide.md | 49 +- .claude/agents/pattern-analyst.md | 32 +- .coderabbit.yaml | 8 +- .github/workflows/ci.yml | 3 +- .oxfmtrc.json | 25 + .oxlintrc.json | 26 + AGENTS.md | 78 +- ARCHITECTURE.md | 131 ++-- CONTRIBUTING.md | 37 +- Makefile | 18 +- PRD.md | 64 +- README.md | 70 +- ROADMAP.md | 16 +- SECURITY.md | 2 +- apps/local-dashboard/index.html | 22 +- apps/local-dashboard/package.json | 8 +- apps/local-dashboard/src/App.tsx | 76 +- .../src/components/app-sidebar.tsx | 201 ++--- .../src/components/runtime-footer.tsx | 29 +- .../src/components/site-header.tsx | 8 +- .../src/components/theme-provider.tsx | 58 +- .../src/components/theme-toggle.tsx | 11 +- .../src/components/ui/avatar.tsx | 56 +- .../src/components/ui/breadcrumb.tsx | 66 +- .../src/components/ui/chart.tsx | 185 ++--- .../src/components/ui/drawer.tsx | 55 +- .../src/components/ui/input.tsx | 12 +- .../src/components/ui/separator.tsx | 18 +- .../src/components/ui/sheet.tsx | 58 +- .../src/components/ui/sidebar.tsx | 292 ++++--- .../src/components/ui/skeleton.tsx | 9 +- .../src/components/ui/sonner.tsx | 43 +- .../src/components/ui/toggle-group.tsx | 40 +- .../src/components/ui/toggle.tsx | 16 +- apps/local-dashboard/src/hooks/useDoctor.ts | 1 + .../src/hooks/useOrchestrateRuns.ts | 1 + apps/local-dashboard/src/hooks/useOverview.ts | 1 + .../src/hooks/useSkillReport.ts | 1 + apps/local-dashboard/src/main.tsx | 2 + apps/local-dashboard/src/pages/Overview.tsx | 154 ++-- .../local-dashboard/src/pages/SkillReport.tsx | 715 ++++++++++-------- apps/local-dashboard/src/pages/Status.tsx | 93 ++- apps/local-dashboard/src/styles.css | 34 +- apps/local-dashboard/vite.config.ts | 1 + bin/selftune.cjs | 2 +- biome.json | 30 - bun.lock | 173 +++-- cli/selftune/activation-rules.ts | 1 + cli/selftune/alpha-upload/build-payloads.ts | 2 + cli/selftune/alpha-upload/stage-canonical.ts | 2 + cli/selftune/auto-update.ts | 1 + cli/selftune/badge/badge.ts | 1 + cli/selftune/canonical-export.ts | 1 + cli/selftune/contribute/bundle.ts | 1 + cli/selftune/contribute/contribute.ts | 1 + cli/selftune/dashboard-server.ts | 1 + cli/selftune/eval/hooks-to-evals.ts | 1 + cli/selftune/eval/import-skillsbench.ts | 1 + cli/selftune/eval/synthetic-evals.ts | 5 +- cli/selftune/eval/unit-test.ts | 1 + cli/selftune/evolution/deploy-proposal.ts | 1 + cli/selftune/evolution/evolve-body.ts | 1 - cli/selftune/evolution/evolve.ts | 1 - cli/selftune/export.ts | 1 + cli/selftune/hooks/auto-activate.ts | 1 + cli/selftune/hooks/evolution-guard.ts | 2 +- cli/selftune/hooks/prompt-log.ts | 1 + cli/selftune/hooks/session-stop.ts | 2 +- cli/selftune/hooks/skill-change-guard.ts | 1 + cli/selftune/hooks/skill-eval.ts | 2 +- cli/selftune/index.ts | 10 +- cli/selftune/ingestors/claude-replay.ts | 1 + cli/selftune/ingestors/codex-rollout.ts | 1 + cli/selftune/ingestors/codex-wrapper.ts | 1 + cli/selftune/ingestors/openclaw-ingest.ts | 1 + cli/selftune/ingestors/opencode-ingest.ts | 1 + cli/selftune/localdb/db.ts | 1 + cli/selftune/localdb/direct-write.ts | 2 + cli/selftune/localdb/materialize.ts | 2 + cli/selftune/localdb/queries.ts | 1 + cli/selftune/normalization.ts | 1 + cli/selftune/observability.ts | 1 + cli/selftune/repair/skill-usage.ts | 1 + cli/selftune/routes/orchestrate-runs.ts | 1 + cli/selftune/routes/overview.ts | 1 + cli/selftune/routes/skill-report.ts | 1 + cli/selftune/sync.ts | 1 + cli/selftune/utils/canonical-log.ts | 2 + cli/selftune/utils/jsonl.ts | 1 + cli/selftune/utils/skill-log.ts | 1 + cli/selftune/utils/transcript.ts | 1 + cli/selftune/utils/trigger-check.ts | 2 +- cli/selftune/workflows/skill-md-writer.ts | 10 +- cli/selftune/workflows/workflows.ts | 1 + .../design-docs/alpha-remote-data-contract.md | 82 +- docs/design-docs/composability-v2.md | 20 +- docs/design-docs/evolution-pipeline.md | 97 +-- docs/design-docs/index.md | 29 +- docs/design-docs/live-dashboard-sse.md | 35 +- docs/design-docs/monitoring-pipeline.md | 48 +- docs/design-docs/sandbox-architecture.md | 28 +- docs/design-docs/sandbox-claude-code.md | 79 +- docs/design-docs/sandbox-test-harness.md | 105 +-- docs/design-docs/sqlite-first-migration.md | 39 +- docs/design-docs/system-overview.md | 22 +- docs/design-docs/workflow-support.md | 35 +- docs/escalation-policy.md | 10 +- .../active/agent-first-alpha-onboarding.md | 25 + .../active/alpha-rollout-data-loop-plan.md | 6 +- .../agent-first-skill-restructure.md | 76 +- .../dashboard-data-integrity-recovery.md | 2 + .../completed/dashboard-signal-integration.md | 29 +- .../completed/output-quality-loop-prereqs.md | 12 + .../completed/telemetry-normalization.md | 74 +- .../advanced-skill-patterns-adoption.md | 38 +- .../deferred/multi-agent-sandbox.md | 90 +-- .../phase-d-marginal-case-review-spike.md | 36 +- .../reference/subagent-testing-checklist.md | 1 + .../reference/telemetry-field-map.md | 130 ++-- docs/exec-plans/scope-expansion-plan.md | 106 ++- docs/exec-plans/tech-debt-tracker.md | 40 +- docs/golden-principles.md | 14 +- docs/integration-guide.md | 74 +- docs/launch-playbook-tracker.md | 60 +- docs/operator-guide.md | 34 +- docs/product-specs/index.md | 10 +- package.json | 63 +- .../fixtures/golden.test.ts | 1 + packages/telemetry-contract/package.json | 2 +- packages/telemetry-contract/src/schemas.ts | 1 + .../tests/compatibility.test.ts | 1 + packages/ui/README.md | 69 +- packages/ui/package.json | 6 +- .../ui/src/components/ActivityTimeline.tsx | 91 ++- packages/ui/src/components/EvidenceViewer.tsx | 471 +++++++----- .../ui/src/components/EvolutionTimeline.tsx | 155 ++-- packages/ui/src/components/InfoTip.tsx | 7 +- .../src/components/OrchestrateRunsPanel.tsx | 113 +-- packages/ui/src/components/section-cards.tsx | 43 +- .../ui/src/components/skill-health-grid.tsx | 406 +++++----- packages/ui/src/lib/constants.tsx | 1 + packages/ui/src/primitives/badge.tsx | 27 +- packages/ui/src/primitives/button.tsx | 14 +- packages/ui/src/primitives/card.tsx | 41 +- packages/ui/src/primitives/checkbox.tsx | 15 +- packages/ui/src/primitives/collapsible.tsx | 10 +- packages/ui/src/primitives/dropdown-menu.tsx | 100 ++- packages/ui/src/primitives/label.tsx | 12 +- packages/ui/src/primitives/select.tsx | 65 +- packages/ui/src/primitives/table.tsx | 61 +- packages/ui/src/primitives/tabs.tsx | 35 +- packages/ui/src/primitives/tooltip.tsx | 32 +- skill/SKILL.md | 121 +-- skill/Workflows/AlphaUpload.md | 8 +- skill/Workflows/AutoActivation.md | 17 +- skill/Workflows/Badge.md | 38 +- skill/Workflows/Baseline.md | 70 +- skill/Workflows/Composability.md | 27 +- skill/Workflows/Contribute.md | 47 +- skill/Workflows/Cron.md | 45 +- skill/Workflows/Dashboard.md | 59 +- skill/Workflows/Doctor.md | 60 +- skill/Workflows/Evals.md | 95 +-- skill/Workflows/EvolutionMemory.md | 52 +- skill/Workflows/Evolve.md | 166 ++-- skill/Workflows/EvolveBody.md | 94 +-- skill/Workflows/Grade.md | 29 +- skill/Workflows/ImportSkillsBench.md | 15 +- skill/Workflows/Ingest.md | 57 +- skill/Workflows/Initialize.md | 110 ++- skill/Workflows/Orchestrate.md | 38 +- skill/Workflows/Replay.md | 19 +- skill/Workflows/Rollback.md | 19 +- skill/Workflows/Schedule.md | 12 +- skill/Workflows/Sync.md | 29 +- skill/Workflows/UnitTest.md | 45 +- skill/Workflows/Watch.md | 49 +- skill/agents/diagnosis-analyst.md | 11 + skill/agents/evolution-reviewer.md | 14 + skill/agents/integration-guide.md | 10 + skill/agents/pattern-analyst.md | 13 +- skill/references/grading-methodology.md | 47 +- skill/references/interactive-config.md | 14 +- skill/references/invocation-taxonomy.md | 42 +- skill/references/logs.md | 20 +- tests/agent-guidance.test.ts | 1 + tests/alpha-upload/build-payloads.test.ts | 1 + tests/alpha-upload/e2e.test.ts | 8 +- tests/alpha-upload/flush.test.ts | 3 +- tests/alpha-upload/integration.test.ts | 1 + tests/alpha-upload/queue.test.ts | 1 + tests/alpha-upload/staging.test.ts | 1 + tests/alpha-upload/status.test.ts | 1 + tests/autonomy-proof.test.ts | 1 + tests/badge/badge-svg.test.ts | 1 + tests/badge/badge.test.ts | 1 + tests/blog-proof/seo-audit-evolve.test.ts | 3 +- tests/blog-proof/trigger-sanity.ts | 1 + tests/canonical-export.test.ts | 1 + tests/contribute/bundle.test.ts | 1 + tests/contribute/contribute.test.ts | 1 + tests/contribute/sanitize.test.ts | 1 + tests/dashboard/badge-routes.test.ts | 1 + tests/dashboard/dashboard-server.test.ts | 1 + tests/eval/baseline.test.ts | 1 + tests/eval/composability-v2.test.ts | 1 + tests/eval/composability.test.ts | 1 + tests/eval/generate-unit-tests.test.ts | 1 + tests/eval/hooks-to-evals.test.ts | 1 + tests/eval/import-skillsbench.test.ts | 1 + tests/eval/synthetic-evals.test.ts | 1 + tests/eval/unit-test.test.ts | 1 + tests/evolution/audit.test.ts | 1 + tests/evolution/constitutional.test.ts | 1 + tests/evolution/deploy-proposal.test.ts | 1 + tests/evolution/evidence.test.ts | 1 + tests/evolution/evolve-body.test.ts | 1 + tests/evolution/evolve.test.ts | 3 +- tests/evolution/extract-patterns.test.ts | 1 + tests/evolution/pareto.test.ts | 1 + tests/evolution/propose-body.test.ts | 6 +- tests/evolution/propose-description.test.ts | 6 +- tests/evolution/propose-routing.test.ts | 16 +- tests/evolution/refine-body.test.ts | 16 +- tests/evolution/rollback.test.ts | 1 + tests/evolution/stopping-criteria.test.ts | 1 + tests/evolution/types.test.ts | 1 + tests/evolution/validate-body.test.ts | 1 + tests/evolution/validate-proposal.test.ts | 1 + tests/evolution/validate-routing.test.ts | 1 + tests/grading/grade-session-flow.test.ts | 1 + tests/grading/grade-session.test.ts | 1 - tests/grading/pre-gates.test.ts | 1 + tests/hooks/auto-activate.test.ts | 1 + tests/hooks/evolution-guard.test.ts | 1 + tests/hooks/prompt-log.test.ts | 1 + tests/hooks/session-stop.test.ts | 1 + tests/hooks/signal-detection.test.ts | 1 + tests/hooks/skill-change-guard.test.ts | 1 + tests/hooks/skill-eval.test.ts | 1 + tests/ingestors/claude-replay.test.ts | 1 + tests/ingestors/codex-rollout.test.ts | 1 + tests/ingestors/codex-wrapper.test.ts | 1 + tests/ingestors/openclaw-ingest.test.ts | 1 + tests/ingestors/opencode-ingest.test.ts | 1 + tests/init/init-enhanced.test.ts | 1 + tests/init/init.test.ts | 1 + tests/last/last.test.ts | 1 + tests/lint-architecture.test.ts | 1 + tests/localdb/localdb.test.ts | 1 - tests/localdb/write.test.ts | 2 + tests/memory/writer.test.ts | 1 + tests/monitoring/integration.test.ts | 1 + tests/monitoring/watch.test.ts | 1 + tests/normalization/normalization.test.ts | 1 + tests/observability.test.ts | 1 + tests/orchestrate-overlap.test.ts | 1 + tests/orchestrate.test.ts | 1 + tests/repair/skill-usage.test.ts | 1 + tests/sandbox/docker/entrypoint.sh | 9 + tests/sandbox/docker/run-with-llm.ts | 3 +- tests/sandbox/run-install-sandbox.ts | 17 +- tests/sandbox/run-sandbox.ts | 29 +- tests/signal-orchestrate.test.ts | 8 - tests/status/status.test.ts | 1 + tests/telemetry-contract/validators.test.ts | 1 + tests/trust-floor/health.test.ts | 1 + tests/trust-floor/hermetic-store.test.ts | 1 + tests/trust-floor/rebuild-preflight.test.ts | 1 + tests/types/new-types.test.ts | 1 + tests/types/ui-contract-parity.test.ts | 1 - tests/utils/canonical-log.test.ts | 1 + tests/utils/frontmatter.test.ts | 1 + tests/utils/html.test.ts | 1 + tests/utils/jsonl.test.ts | 1 + tests/utils/query-filter.test.ts | 1 + tests/utils/schema-validator.test.ts | 1 + tests/utils/skill-discovery.test.ts | 1 + tests/utils/skill-log.test.ts | 1 + tests/utils/transcript.test.ts | 1 + tests/utils/trigger-check.test.ts | 1 + tests/workflows/discover.test.ts | 1 + tests/workflows/skill-md-writer.test.ts | 1 + tests/workflows/workflows.test.ts | 1 + 286 files changed, 4502 insertions(+), 3686 deletions(-) create mode 100644 .oxfmtrc.json create mode 100644 .oxlintrc.json delete mode 100644 biome.json diff --git a/.claude/agents/diagnosis-analyst.md b/.claude/agents/diagnosis-analyst.md index 3a947127..681f2d8d 100644 --- a/.claude/agents/diagnosis-analyst.md +++ b/.claude/agents/diagnosis-analyst.md @@ -20,6 +20,7 @@ This agent is spawned by the main agent as a subagent when deeper analysis is needed — it is not called directly by the user. **Connected workflows:** + - **Doctor** — when `selftune doctor` reveals persistent issues with a specific skill, spawn this agent for root cause analysis - **Grade** — when grades are consistently low for a skill, spawn this agent to investigate why - **Status** — when `selftune status` shows CRITICAL or WARNING flags on a skill, spawn this agent for a deep dive @@ -31,6 +32,7 @@ or unexplained failures warrant spawning this agent. ## Context You need access to: + - `~/.claude/session_telemetry_log.jsonl` — session-level metrics - `~/.claude/skill_usage_log.jsonl` — skill trigger events - `~/.claude/all_queries_log.jsonl` — all user queries (triggered and missed) @@ -62,6 +64,7 @@ selftune eval generate --skill --stats ``` Review aggregate metrics: + - **Error rate** — high error rate suggests process failures, not trigger issues - **Tool call breakdown** — unusual patterns (e.g., excessive Bash retries) indicate thrashing - **Average turns** — abnormally high turn count suggests the agent is struggling @@ -73,6 +76,7 @@ selftune eval generate --skill --max 50 ``` Review the generated eval set. Count entries by invocation type: + - **Explicit missed** = description is fundamentally broken (critical) - **Implicit missed** = description too narrow (common, fixable via evolve) - **Contextual missed** = lacks domain vocabulary (fixable via evolve) @@ -84,6 +88,7 @@ Reference `skill/references/invocation-taxonomy.md` for the full taxonomy. Read the skill's `SKILL.md` and check recent grading results. For each failed expectation, look at: + - **Trigger tier** — did the skill fire at all? - **Process tier** — did the agent follow the right steps? - **Quality tier** — was the output actually good? @@ -94,6 +99,7 @@ Reference `skill/references/grading-methodology.md` for the 3-tier model. Read `~/.claude/evolution_audit_log.jsonl` for entries matching the skill. Look for: + - Recent evolutions that may have introduced regressions - Rollbacks that suggest instability - Plateau patterns (repeated evolutions with no improvement) @@ -101,6 +107,7 @@ Look for: ### Step 7: Inspect session transcripts For the worst-performing sessions, read the transcript JSONL files. Look for: + - SKILL.md not being read (trigger failure) - Steps executed out of order (process failure) - Repeated errors or thrashing (quality failure) @@ -112,13 +119,13 @@ Compile findings into a structured report. ## Commands -| Command | Purpose | -|---------|---------| -| `selftune status` | Overall health snapshot | -| `selftune last` | Most recent session details | -| `selftune eval generate --skill --stats` | Aggregate telemetry | +| Command | Purpose | +| ------------------------------------------------ | --------------------------------------- | +| `selftune status` | Overall health snapshot | +| `selftune last` | Most recent session details | +| `selftune eval generate --skill --stats` | Aggregate telemetry | | `selftune eval generate --skill --max 50` | Generate eval set for coverage analysis | -| `selftune doctor` | Check infrastructure health | +| `selftune doctor` | Check infrastructure health | ## Output @@ -128,29 +135,36 @@ Produce a structured diagnosis report: ## Diagnosis Report: ### Summary + [One-paragraph overview of the problem] ### Health Metrics + - Pass rate: X% - Sessions analyzed: N - Error rate: X% - Trigger coverage: explicit X% / implicit X% / contextual X% ### Root Cause + [Primary reason for underperformance, categorized as:] + - TRIGGER: Skill not firing when it should - PROCESS: Skill fires but agent follows wrong steps - QUALITY: Steps are correct but output is poor - INFRASTRUCTURE: Hooks, logs, or config issues ### Evidence + [Specific log entries, transcript lines, or metrics supporting the diagnosis] ### Recommendations + 1. [Highest priority fix] 2. [Secondary fix] 3. [Optional improvement] ### Suggested Commands + [Exact selftune commands to execute the recommended fixes] ``` diff --git a/.claude/agents/evolution-reviewer.md b/.claude/agents/evolution-reviewer.md index 37081bfa..95f12cfa 100644 --- a/.claude/agents/evolution-reviewer.md +++ b/.claude/agents/evolution-reviewer.md @@ -12,6 +12,7 @@ gate that checks for regressions, validates eval set coverage, compares old vs. new descriptions, and provides an approve/reject verdict with reasoning. **Activate when the user says:** + - "review evolution proposal" - "check before deploying evolution" - "is this evolution safe" @@ -24,16 +25,19 @@ This agent is spawned by the main agent as a subagent to provide a safety review before deploying an evolution. **Connected workflows:** + - **Evolve** — in the review-before-deploy step, spawn this agent to evaluate the proposal for regressions, scope creep, and eval set quality - **EvolveBody** — same role for full-body and routing-table evolutions **Mode behavior:** + - **Interactive mode** — spawn this agent before deploying an evolution to get a human-readable safety review with an approve/reject verdict - **Autonomous mode** — the orchestrator handles validation internally using regression thresholds and auto-rollback; this agent is for interactive safety reviews only ## Context You need access to: + - `~/.claude/evolution_audit_log.jsonl` — proposal entries with before/after data - The target skill's `SKILL.md` file (current version) - The skill's `SKILL.md.bak` file (pre-evolution backup, if it exists) @@ -77,6 +81,7 @@ prefix, read the skill's `SKILL.md.bak` file (created by the evolve workflow as a pre-evolution backup) to obtain the original description. Check for: + - **Preserved triggers** — all existing trigger phrases still present - **Added triggers** — new phrases covering missed queries - **Removed content** — anything removed that should not have been @@ -86,6 +91,7 @@ Check for: ### Step 4: Validate eval set quality Read the eval set used for validation. Check: + - **Size** — at least 20 entries for meaningful coverage - **Type balance** — mix of explicit, implicit, contextual, and negative - **Negative coverage** — enough negatives to catch overtriggering @@ -96,6 +102,7 @@ Reference `skill/references/invocation-taxonomy.md` for healthy distribution. ### Step 5: Check regression metrics From the proposal output or audit log `validated` entry, verify: + - **Pass rate improved** — proposed rate > original rate - **No excessive regressions** — regression count < 5% of total evals - **Confidence above threshold** — proposal confidence >= 0.7 @@ -104,6 +111,7 @@ From the proposal output or audit log `validated` entry, verify: ### Step 6: Review evolution history Check for patterns that suggest instability: + - Multiple evolutions in a short time (churn) - Previous rollbacks for this skill (fragility) - Plateau pattern (evolution not producing meaningful gains) @@ -124,12 +132,12 @@ Issue an approve or reject decision with full reasoning. ## Commands -| Command | Purpose | -|---------|---------| -| `selftune evolve --skill --skill-path --dry-run` | Generate proposal without deploying | -| Read eval file from evolve output or audit log | Inspect the exact eval set used for validation | -| `selftune watch --skill --skill-path ` | Check current performance baseline | -| `selftune status` | Overall skill health context | +| Command | Purpose | +| -------------------------------------------------------------- | ---------------------------------------------- | +| `selftune evolve --skill --skill-path --dry-run` | Generate proposal without deploying | +| Read eval file from evolve output or audit log | Inspect the exact eval set used for validation | +| `selftune watch --skill --skill-path ` | Check current performance baseline | +| `selftune status` | Overall skill health context | ## Output diff --git a/.claude/agents/integration-guide.md b/.claude/agents/integration-guide.md index 434144aa..f3925e6c 100644 --- a/.claude/agents/integration-guide.md +++ b/.claude/agents/integration-guide.md @@ -12,6 +12,7 @@ project structure, generate appropriate configuration, install hooks, and verify the setup is working end-to-end. **Activate when the user says:** + - "set up selftune" - "integrate selftune" - "configure selftune for my project" @@ -25,6 +26,7 @@ This agent is the deep-dive version of the Initialize workflow, spawned by the main agent as a subagent when the project structure is complex. **Connected workflows:** + - **Initialize** — for complex project structures (monorepos, multi-skill repos, mixed agent platforms), spawn this agent instead of running the basic init workflow **When to spawn:** when the project has multiple SKILL.md files, multiple @@ -34,6 +36,7 @@ structure where the standard `selftune init` needs project-specific guidance. ## Context You need access to: + - The user's project root directory - `~/.selftune/config.json` (may not exist yet) - `~/.claude/settings.json` (for hook installation) @@ -48,18 +51,22 @@ You need access to: Examine the workspace to determine the project type: **Single-skill project:** + - One `SKILL.md` at or near the project root - Typical for focused tools and utilities **Multi-skill project:** + - Multiple `SKILL.md` files in separate directories - Skills are independent but coexist in one repo **Monorepo:** + - Multiple packages/projects with their own skill files - May have shared configuration at the root level **No skills yet:** + - No `SKILL.md` files found - User needs to create skills before selftune can observe them @@ -72,6 +79,7 @@ selftune doctor ``` If selftune is already installed, parse the doctor output: + - **All checks pass** — setup is complete, offer to run a health audit - **Some checks fail** — fix the failing checks (see Step 6) - **Command not found** — proceed to Step 3 @@ -102,6 +110,7 @@ Parse the output to confirm `~/.selftune/config.json` was created. Note the detected `agent_type` and `cli_path`. If the user is on a non-Claude agent platform: + - **Codex** — inform about `ingest wrap-codex` and `ingest codex` options - **OpenCode** — inform about `ingest opencode` option @@ -110,11 +119,11 @@ If the user is on a non-Claude agent platform: For **Claude Code** users, merge hook entries from `skill/settings_snippet.json` into `~/.claude/settings.json`. Three hooks are required: -| Hook | Script | Purpose | -|------|--------|---------| -| `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query | -| `PostToolUse` (Read) | `hooks/skill-eval.ts` | Track skill triggers | -| `Stop` | `hooks/session-stop.ts` | Capture session telemetry | +| Hook | Script | Purpose | +| -------------------- | ----------------------- | ------------------------- | +| `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query | +| `PostToolUse` (Read) | `hooks/skill-eval.ts` | Track skill triggers | +| `Stop` | `hooks/session-stop.ts` | Capture session telemetry | Derive script paths from `cli_path` in `~/.selftune/config.json`. @@ -129,13 +138,13 @@ selftune doctor All checks must pass. For any failures: -| Failed Check | Resolution | -|-------------|------------| -| Log files missing | Run a test session to generate initial entries | -| Logs not parseable | Inspect and fix corrupted log lines | -| Hooks not installed | Re-check settings.json merge from Step 5 | -| Hook scripts missing | Verify paths point to actual files on disk | -| Audit log invalid | Remove corrupted entries | +| Failed Check | Resolution | +| -------------------- | ---------------------------------------------- | +| Log files missing | Run a test session to generate initial entries | +| Logs not parseable | Inspect and fix corrupted log lines | +| Hooks not installed | Re-check settings.json merge from Step 5 | +| Hook scripts missing | Verify paths point to actual files on disk | +| Audit log invalid | Remove corrupted entries | Re-run doctor after each fix until all checks pass. @@ -176,12 +185,12 @@ Tell the user what to do next based on their goals: ## Commands -| Command | Purpose | -|---------|---------| -| `selftune init` | Bootstrap configuration | -| `selftune doctor` | Verify installation health | -| `selftune status` | Post-setup health check | -| `selftune last` | Verify telemetry capture | +| Command | Purpose | +| -------------------------------------- | -------------------------------- | +| `selftune init` | Bootstrap configuration | +| `selftune doctor` | Verify installation health | +| `selftune status` | Post-setup health check | +| `selftune last` | Verify telemetry capture | | `selftune eval generate --list-skills` | Confirm skills are being tracked | ## Output @@ -192,20 +201,24 @@ Produce a setup completion summary: ## selftune Setup Complete ### Environment + - Agent: - Project type: - Skills detected: ### Configuration + - Config: ~/.selftune/config.json [created / verified] - Hooks: [installed / N/A for non-Claude agents] - Doctor: [all checks pass / N failures — see below] ### Verification + - Telemetry capture: [working / not verified] - Skill tracking: [working / not verified] ### Next Steps + 1. [Primary recommended action] 2. [Secondary action] 3. [Optional action] diff --git a/.claude/agents/pattern-analyst.md b/.claude/agents/pattern-analyst.md index d0194a0c..8dacd839 100644 --- a/.claude/agents/pattern-analyst.md +++ b/.claude/agents/pattern-analyst.md @@ -12,6 +12,7 @@ where multiple skills compete for the same queries, find optimization opportunities, and identify systemic issues affecting multiple skills. **Activate when the user says:** + - "skill patterns" - "conflicts between skills" - "cross-skill analysis" @@ -25,6 +26,7 @@ This agent is spawned by the main agent as a subagent for deep cross-skill analysis. **Connected workflows:** + - **Composability** — when `selftune eval composability` identifies conflict candidates, spawn this agent for deeper investigation of trigger overlaps and resolution strategies - **Evals** — when analyzing cross-skill patterns or systemwide undertriggering, spawn this agent to find optimization opportunities @@ -35,6 +37,7 @@ conflicts (score > 0.3). ## Context You need access to: + - `~/.claude/skill_usage_log.jsonl` — which skills triggered for which queries - `~/.claude/all_queries_log.jsonl` — all queries including non-triggers - `~/.claude/session_telemetry_log.jsonl` — session-level metrics per skill @@ -65,6 +68,7 @@ skills that are healthy vs. those showing warnings or regressions. For each skill returned in Step 1, locate and read its `SKILL.md` file. Extract: + - The `description` field from frontmatter - Trigger keywords from the workflow routing table - Negative examples (if present) @@ -72,6 +76,7 @@ Extract: ### Step 4: Detect trigger conflicts Compare trigger keywords and description phrases across all skills. Flag: + - **Direct conflicts** — two skills list the same trigger keyword - **Semantic overlaps** — different words with the same meaning (e.g., "presentation" in skill A, "slide deck" in skill B) @@ -81,6 +86,7 @@ Compare trigger keywords and description phrases across all skills. Flag: ### Step 5: Analyze query routing patterns Read `skill_usage_log.jsonl` and group by query text. Look for: + - Queries that triggered multiple skills (conflict signal) - Queries that triggered no skills despite matching a description (gap signal) - Queries that triggered the wrong skill (misroute signal) @@ -94,6 +100,7 @@ selftune eval generate --skill --stats ``` Compare across skills: + - **Error rates** — are some skills consistently failing? - **Turn counts** — outlier skills may have process issues - **Tool call patterns** — skills with similar patterns may be duplicates @@ -101,6 +108,7 @@ Compare across skills: ### Step 7: Check evolution interactions Read `~/.claude/evolution_audit_log.jsonl` for all skills. Look for: + - Evolution in one skill that caused regression in another - Skills evolved in parallel that now conflict - Rollbacks that correlate with another skill's evolution @@ -111,12 +119,12 @@ Compile a cross-skill analysis report. ## Commands -| Command | Purpose | -|---------|---------| -| `selftune eval generate --list-skills` | Inventory all skills with query counts | -| `selftune status` | Health snapshot across all skills | -| `selftune eval generate --skill --stats` | Per-skill aggregate telemetry | -| `selftune eval generate --skill --max 50` | Generate eval set per skill | +| Command | Purpose | +| ------------------------------------------------ | -------------------------------------- | +| `selftune eval generate --list-skills` | Inventory all skills with query counts | +| `selftune status` | Health snapshot across all skills | +| `selftune eval generate --skill --stats` | Per-skill aggregate telemetry | +| `selftune eval generate --skill --max 50` | Generate eval set per skill | ## Output @@ -126,34 +134,42 @@ Produce a structured pattern analysis report: ## Cross-Skill Pattern Analysis ### Skill Inventory + | Skill | Sessions | Pass Rate | Status | -|-------|----------|-----------|--------| +| ----- | -------- | --------- | ------ | | ... | ... | ... | ... | ### Trigger Conflicts + [List of conflicting trigger pairs with affected queries] | Skill A | Skill B | Shared Triggers | Affected Queries | -|---------|---------|-----------------|------------------| +| ------- | ------- | --------------- | ---------------- | | ... | ... | ... | ... | ### Coverage Gaps + [Queries from all_queries_log that matched no skill] ### Misroutes + [Queries that triggered the wrong skill based on intent analysis] ### Systemic Issues + [Problems affecting multiple skills: shared infrastructure, common failure patterns, evolution interference] ### Optimization Recommendations + 1. [Highest impact change] 2. [Secondary optimization] 3. [Future consideration] ### Conflict Resolution Plan + [For each conflict, a specific resolution:] + - Skill A should own: [queries] - Skill B should own: [queries] - Add negative examples to: [skill] diff --git a/.coderabbit.yaml b/.coderabbit.yaml index 26bb9385..511dee7f 100644 --- a/.coderabbit.yaml +++ b/.coderabbit.yaml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json -# CodeRabbit config for selftune — TypeScript + Bun + Biome CLI tool +# CodeRabbit config for selftune — TypeScript + Bun + oxc CLI tool language: "en-US" tone_instructions: > @@ -47,7 +47,7 @@ reviews: - Proper process.exit() with correct codes (0=success, 1=error) - Actionable error messages, not raw stack traces - No hard-coded absolute paths; use path.resolve() or import.meta.dir - - Biome enforces formatting; do not comment on style + - oxfmt enforces formatting; do not comment on style - path: "tests/**/*.ts" instructions: | @@ -104,11 +104,11 @@ reviews: tools: biome: - enabled: true + enabled: false eslint: enabled: false oxc: - enabled: false + enabled: true github-checks: enabled: true ast-grep: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 80bbef13..6956a890 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,8 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2 - run: bun install - - run: bunx @biomejs/biome check . + - run: bunx oxlint --format=github + - run: bunx oxfmt --check - run: bun run lint-architecture.ts build-dashboard: diff --git a/.oxfmtrc.json b/.oxfmtrc.json new file mode 100644 index 00000000..c21c659e --- /dev/null +++ b/.oxfmtrc.json @@ -0,0 +1,25 @@ +{ + "$schema": "./node_modules/oxfmt/configuration_schema.json", + "useTabs": false, + "tabWidth": 2, + "printWidth": 100, + "singleQuote": false, + "jsxSingleQuote": false, + "quoteProps": "as-needed", + "trailingComma": "all", + "semi": true, + "arrowParens": "always", + "bracketSameLine": false, + "bracketSpacing": true, + "sortImports": { + "enabled": true + }, + "ignorePatterns": [ + "**/.agent/skills", + "**/.claude/skills", + "**/.claude/worktrees", + "**/test-results", + "**/node_modules", + "**/bun.lock" + ] +} diff --git a/.oxlintrc.json b/.oxlintrc.json new file mode 100644 index 00000000..e02f9ed4 --- /dev/null +++ b/.oxlintrc.json @@ -0,0 +1,26 @@ +{ + "$schema": "./node_modules/oxlint/configuration_schema.json", + "categories": { + "correctness": "error", + "suspicious": "warn" + }, + "plugins": ["typescript", "unicorn", "oxc", "import"], + "rules": { + "no-shadow": "off", + "no-unmodified-loop-condition": "off", + "preserve-caught-error": "off", + "unicorn/no-array-sort": "off", + "unicorn/consistent-function-scoping": "off", + "unicorn/prefer-add-event-listener": "off", + "import/no-unassigned-import": "off" + }, + "ignorePatterns": [ + ".agent/skills", + ".claude/skills", + ".claude/worktrees", + "test-results", + "node_modules", + "bun.lock", + "apps/local-dashboard/dist" + ] +} diff --git a/AGENTS.md b/AGENTS.md index 28064fa5..466d1a1b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,6 +11,7 @@ selftune — Self-improving skills for AI agents. Watches real sessions, learns **selftune is a skill consumed by AI agents, not a CLI tool used by humans directly.** The user's interaction model is: + 1. Install the skill: `npx skills add selftune-dev/selftune` 2. Tell their agent: "set up selftune" / "improve my skills" / "how are my skills doing?" 3. The agent reads `skill/SKILL.md`, routes to the correct workflow, and runs CLI commands @@ -18,6 +19,7 @@ The user's interaction model is: The CLI (`cli/selftune/`) is the **agent's API**. The skill definition (`skill/SKILL.md`) is the **product surface**. Workflow docs (`skill/Workflows/`) are the **agent's instruction manual**. Users rarely if ever run `selftune` commands directly — their coding agent does it for them. **When developing selftune:** + - Changes to CLI behavior must be reflected in the corresponding `skill/Workflows/*.md` doc - New CLI commands need a workflow doc and a routing entry in `skill/SKILL.md` - Error messages should guide the agent, not the human (e.g., suggest the next CLI command, not "check the docs") @@ -152,50 +154,50 @@ See ARCHITECTURE.md for domain map, module layering, and dependency rules. ## Documentation Map -| Topic | Location | Status | -|-------|----------|--------| -| System Overview | docs/design-docs/system-overview.md | Current | -| Operator Guide | docs/operator-guide.md | Current | -| Architecture | ARCHITECTURE.md | Current | -| Product Requirements | PRD.md | Current | -| Skill Definition | skill/SKILL.md | Current | -| Design Docs | docs/design-docs/index.md | Current | -| Core Beliefs | docs/design-docs/core-beliefs.md | Current | -| Live Dashboard SSE | docs/design-docs/live-dashboard-sse.md | Current | -| SQLite-First Migration | docs/design-docs/sqlite-first-migration.md | Current | -| Product Specs | docs/product-specs/index.md | Current | -| Active Plans (~4 epics) | docs/exec-plans/active/ | Current | -| Completed Plans | docs/exec-plans/completed/ | Current | -| Deferred Plans | docs/exec-plans/deferred/ | Current | -| Technical Debt | docs/exec-plans/tech-debt-tracker.md | Current | -| Risk Policy | risk-policy.json | Current | -| Golden Principles | docs/golden-principles.md | Current | -| Escalation Policy | docs/escalation-policy.md | Current | -| References | skill/references/ | Current | -| Launch Playbook | docs/launch-playbook-tracker.md | Current | -| Security Policy | SECURITY.md | Current | -| Contributing Guide | CONTRIBUTING.md | Current | -| Code of Conduct | CODE_OF_CONDUCT.md | Current | -| License | LICENSE | Current | +| Topic | Location | Status | +| ----------------------- | ------------------------------------------ | ------- | +| System Overview | docs/design-docs/system-overview.md | Current | +| Operator Guide | docs/operator-guide.md | Current | +| Architecture | ARCHITECTURE.md | Current | +| Product Requirements | PRD.md | Current | +| Skill Definition | skill/SKILL.md | Current | +| Design Docs | docs/design-docs/index.md | Current | +| Core Beliefs | docs/design-docs/core-beliefs.md | Current | +| Live Dashboard SSE | docs/design-docs/live-dashboard-sse.md | Current | +| SQLite-First Migration | docs/design-docs/sqlite-first-migration.md | Current | +| Product Specs | docs/product-specs/index.md | Current | +| Active Plans (~4 epics) | docs/exec-plans/active/ | Current | +| Completed Plans | docs/exec-plans/completed/ | Current | +| Deferred Plans | docs/exec-plans/deferred/ | Current | +| Technical Debt | docs/exec-plans/tech-debt-tracker.md | Current | +| Risk Policy | risk-policy.json | Current | +| Golden Principles | docs/golden-principles.md | Current | +| Escalation Policy | docs/escalation-policy.md | Current | +| References | skill/references/ | Current | +| Launch Playbook | docs/launch-playbook-tracker.md | Current | +| Security Policy | SECURITY.md | Current | +| Contributing Guide | CONTRIBUTING.md | Current | +| Code of Conduct | CODE_OF_CONDUCT.md | Current | +| License | LICENSE | Current | ## Change Propagation Map When changing one part of selftune, check if dependent files need updating. This prevents stale docs and broken contracts. -| If you change... | Also update... | -|------------------|---------------| -| CLI commands in `index.ts` (add/rename/remove) | `skill/SKILL.md` Quick Reference + Workflow Routing table, `README.md` Commands table, `AGENTS.md` project tree | -| CLI flags on any command | The command's `skill/Workflows/*.md` doc (flags table + examples) | -| JSONL log schema or new log file | `constants.ts`, `types.ts`, `skill/references/logs.md`, `localdb/schema.ts` + `materialize.ts` + `direct-write.ts` + `queries.ts`, `ARCHITECTURE.md` data architecture | -| Dashboard contract (`dashboard-contract.ts`) | `apps/local-dashboard/src/types.ts`, dashboard components that consume the changed fields | -| Hook behavior (`hooks/*.ts`) | `skill/Workflows/Initialize.md` hook table, `skill/settings_snippet.json` | -| Orchestrate behavior | `skill/Workflows/Orchestrate.md`, `ARCHITECTURE.md` operating modes | -| Agent files (`skill/agents/*.md`) | `skill/SKILL.md` Specialized Agents table | -| New workflow file | `skill/SKILL.md` Workflow Routing table + Resource Index | -| Evolution pipeline changes | `skill/Workflows/Evolve.md`, `docs/design-docs/evolution-pipeline.md` | -| Platform adapter (ingestor) changes | `skill/Workflows/Ingest.md`, `README.md` Platforms section | -| Repo org/name change | `README.md` badges + install, `llms.txt`, `SECURITY.md`, `CONTRIBUTING.md`, `contribute.ts` repo constant, `package.json` (homepage/repo/bugs) | +| If you change... | Also update... | +| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CLI commands in `index.ts` (add/rename/remove) | `skill/SKILL.md` Quick Reference + Workflow Routing table, `README.md` Commands table, `AGENTS.md` project tree | +| CLI flags on any command | The command's `skill/Workflows/*.md` doc (flags table + examples) | +| JSONL log schema or new log file | `constants.ts`, `types.ts`, `skill/references/logs.md`, `localdb/schema.ts` + `materialize.ts` + `direct-write.ts` + `queries.ts`, `ARCHITECTURE.md` data architecture | +| Dashboard contract (`dashboard-contract.ts`) | `apps/local-dashboard/src/types.ts`, dashboard components that consume the changed fields | +| Hook behavior (`hooks/*.ts`) | `skill/Workflows/Initialize.md` hook table, `skill/settings_snippet.json` | +| Orchestrate behavior | `skill/Workflows/Orchestrate.md`, `ARCHITECTURE.md` operating modes | +| Agent files (`skill/agents/*.md`) | `skill/SKILL.md` Specialized Agents table | +| New workflow file | `skill/SKILL.md` Workflow Routing table + Resource Index | +| Evolution pipeline changes | `skill/Workflows/Evolve.md`, `docs/design-docs/evolution-pipeline.md` | +| Platform adapter (ingestor) changes | `skill/Workflows/Ingest.md`, `README.md` Platforms section | +| Repo org/name change | `README.md` badges + install, `llms.txt`, `SECURITY.md`, `CONTRIBUTING.md`, `contribute.ts` repo constant, `package.json` (homepage/repo/bugs) | ## Development Workflow diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 3dea5e73..5f515c59 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -9,6 +9,7 @@ selftune is a local-first feedback loop for AI agent skills. It turns saved agen selftune is a **skill consumed by AI agents**, not a CLI tool for humans. The user installs the skill (`npx skills add selftune-dev/selftune`), then interacts through their coding agent ("set up selftune", "improve my skills"). The agent reads `skill/SKILL.md` to discover commands, routes to the correct workflow doc, and executes CLI commands on the user's behalf. This means: + - `skill/SKILL.md` is the primary product surface (agent reads this to know what to do) - `skill/Workflows/*.md` are the agent's step-by-step guides - `cli/selftune/` is the agent's API (the CLI binary the agent calls) @@ -69,25 +70,25 @@ flowchart LR ## Domain Map -| Domain | Directory / File | Responsibility | Quality Grade | -|--------|-------------------|----------------|---------------| -| Bootstrap | `cli/selftune/init.ts` | Agent detection, config bootstrap, setup guidance | B | -| Telemetry | `cli/selftune/hooks/` | Claude hook-based prompt, session, and skill-use hints | B | -| Ingestors | `cli/selftune/ingestors/` | Normalize Claude, Codex, OpenCode, and OpenClaw data into shared logs | B | -| Source Sync | `cli/selftune/sync.ts`, `cli/selftune/repair/` | Rebuild source-truth local evidence and repaired overlays | B | -| Scheduling | `cli/selftune/schedule.ts` | Generic cron/launchd/systemd artifact generation and install | B | -| Cron Adapter | `cli/selftune/cron/` | Optional OpenClaw cron integration | B | -| Eval | `cli/selftune/eval/` | False-negative detection, eval generation, baseline, unit tests, composability | B | -| Grading | `cli/selftune/grading/` | Three-tier session grading with deterministic pre-gates and agent-based evaluation | B | -| Evolution | `cli/selftune/evolution/` | Propose, validate, deploy, audit, and rollback skill changes | B | -| Orchestrator | `cli/selftune/orchestrate.ts` | Autonomy-first sync -> candidate selection -> evolve -> watch loop | B | -| Monitoring | `cli/selftune/monitoring/` | Post-deploy regression detection and rollback triggers | B | -| Local DB | `cli/selftune/localdb/` | SQLite materialization and payload-oriented queries | B | -| Dashboard | `cli/selftune/dashboard.ts`, `cli/selftune/dashboard-server.ts`, `apps/local-dashboard/` | Local SPA shell, v2 API with SSE live updates, overview/report/status UI | B | -| Observability CLI | `cli/selftune/status.ts`, `cli/selftune/last.ts`, `cli/selftune/badge/` | Fast local readouts of health, recent activity, and badge state | B | -| Alpha Upload | `cli/selftune/alpha-upload/`, `cli/selftune/alpha-identity.ts` | Alpha data pipeline: queue, V2 payload build, flush, HTTP transport with API key auth | B | -| Contribute | `cli/selftune/contribute/` | Opt-in anonymized export for community signal pooling | C | -| Skill | `skill/` | Agent-facing routing table, workflows, and references | B | +| Domain | Directory / File | Responsibility | Quality Grade | +| ----------------- | ---------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------- | +| Bootstrap | `cli/selftune/init.ts` | Agent detection, config bootstrap, setup guidance | B | +| Telemetry | `cli/selftune/hooks/` | Claude hook-based prompt, session, and skill-use hints | B | +| Ingestors | `cli/selftune/ingestors/` | Normalize Claude, Codex, OpenCode, and OpenClaw data into shared logs | B | +| Source Sync | `cli/selftune/sync.ts`, `cli/selftune/repair/` | Rebuild source-truth local evidence and repaired overlays | B | +| Scheduling | `cli/selftune/schedule.ts` | Generic cron/launchd/systemd artifact generation and install | B | +| Cron Adapter | `cli/selftune/cron/` | Optional OpenClaw cron integration | B | +| Eval | `cli/selftune/eval/` | False-negative detection, eval generation, baseline, unit tests, composability | B | +| Grading | `cli/selftune/grading/` | Three-tier session grading with deterministic pre-gates and agent-based evaluation | B | +| Evolution | `cli/selftune/evolution/` | Propose, validate, deploy, audit, and rollback skill changes | B | +| Orchestrator | `cli/selftune/orchestrate.ts` | Autonomy-first sync -> candidate selection -> evolve -> watch loop | B | +| Monitoring | `cli/selftune/monitoring/` | Post-deploy regression detection and rollback triggers | B | +| Local DB | `cli/selftune/localdb/` | SQLite materialization and payload-oriented queries | B | +| Dashboard | `cli/selftune/dashboard.ts`, `cli/selftune/dashboard-server.ts`, `apps/local-dashboard/` | Local SPA shell, v2 API with SSE live updates, overview/report/status UI | B | +| Observability CLI | `cli/selftune/status.ts`, `cli/selftune/last.ts`, `cli/selftune/badge/` | Fast local readouts of health, recent activity, and badge state | B | +| Alpha Upload | `cli/selftune/alpha-upload/`, `cli/selftune/alpha-identity.ts` | Alpha data pipeline: queue, V2 payload build, flush, HTTP transport with API key auth | B | +| Contribute | `cli/selftune/contribute/` | Opt-in anonymized export for community signal pooling | C | +| Skill | `skill/` | Agent-facing routing table, workflows, and references | B | ## Dependency Direction @@ -261,23 +262,23 @@ skill/ ## Module Definitions -| Module | Files | Responsibility | May Import From | -|--------|-------|----------------|-----------------| -| Shared | `types.ts`, `constants.ts`, `utils/*.ts` | Core shared types, paths, JSONL helpers, transcript parsing, agent-call helpers | Bun built-ins only | -| Bootstrap | `init.ts`, `observability.ts` | Config bootstrap and health checks | Shared | -| Hooks | `hooks/*.ts` | Claude-specific hints, activation rules, and enforcement guards | Shared | -| Ingestors | `ingestors/*.ts` | Normalize platform-specific session sources | Shared | -| Source Sync | `sync.ts`, `repair/*.ts` | Produce trustworthy local evidence before downstream decisions | Shared, Ingestors | -| Scheduling | `schedule.ts` | Build and optionally install generic scheduling artifacts | Shared | -| Cron Adapter | `cron/*.ts` | OpenClaw-specific scheduling setup/list/remove | Shared | -| Eval | `eval/*.ts` | Build eval sets, detect false negatives, baseline and composability analysis | Shared | -| Grading | `grading/*.ts` | Session grading and pre-gates | Shared, Eval | -| Evolution | `evolution/*.ts` | Description/body/routing proposal, validation, deploy, rollback, audit | Shared, Eval, Grading | -| Orchestrator | `orchestrate.ts` | Coordinate sync, candidate selection, evolve, and watch | Shared, Sync, Evolution, Monitoring, Status | -| Monitoring | `monitoring/*.ts` | Watch deployed changes and trigger rollback | Shared, Evolution | -| Local DB | `localdb/*.ts` | Materialize logs and audits into overview/report/query shapes | Shared, Sync outputs, Evolution audit | -| Dashboard | `dashboard.ts`, `dashboard-server.ts`, `apps/local-dashboard/` | Serve and render the local dashboard experience | Shared, LocalDB, Status, Observability, Evolution (evidence) | -| Skill | `skill/` | Provide agent-facing command routing and workflow guidance | Reads public CLI behavior and references | +| Module | Files | Responsibility | May Import From | +| ------------ | -------------------------------------------------------------- | ------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| Shared | `types.ts`, `constants.ts`, `utils/*.ts` | Core shared types, paths, JSONL helpers, transcript parsing, agent-call helpers | Bun built-ins only | +| Bootstrap | `init.ts`, `observability.ts` | Config bootstrap and health checks | Shared | +| Hooks | `hooks/*.ts` | Claude-specific hints, activation rules, and enforcement guards | Shared | +| Ingestors | `ingestors/*.ts` | Normalize platform-specific session sources | Shared | +| Source Sync | `sync.ts`, `repair/*.ts` | Produce trustworthy local evidence before downstream decisions | Shared, Ingestors | +| Scheduling | `schedule.ts` | Build and optionally install generic scheduling artifacts | Shared | +| Cron Adapter | `cron/*.ts` | OpenClaw-specific scheduling setup/list/remove | Shared | +| Eval | `eval/*.ts` | Build eval sets, detect false negatives, baseline and composability analysis | Shared | +| Grading | `grading/*.ts` | Session grading and pre-gates | Shared, Eval | +| Evolution | `evolution/*.ts` | Description/body/routing proposal, validation, deploy, rollback, audit | Shared, Eval, Grading | +| Orchestrator | `orchestrate.ts` | Coordinate sync, candidate selection, evolve, and watch | Shared, Sync, Evolution, Monitoring, Status | +| Monitoring | `monitoring/*.ts` | Watch deployed changes and trigger rollback | Shared, Evolution | +| Local DB | `localdb/*.ts` | Materialize logs and audits into overview/report/query shapes | Shared, Sync outputs, Evolution audit | +| Dashboard | `dashboard.ts`, `dashboard-server.ts`, `apps/local-dashboard/` | Serve and render the local dashboard experience | Shared, LocalDB, Status, Observability, Evolution (evidence) | +| Skill | `skill/` | Provide agent-facing command routing and workflow guidance | Reads public CLI behavior and references | ## Truth Model: Hooks vs. Source Systems @@ -365,45 +366,45 @@ marked consumed so they don't affect subsequent runs. `selftune init` writes `~/.selftune/config.json`. -| Field | Type | Description | -|-------|------|-------------| -| `agent_type` | `claude_code \| codex \| opencode \| openclaw \| unknown` | Detected host agent | -| `cli_path` | `string` | Absolute path to the selftune CLI entry point | -| `llm_mode` | `agent \| api` | How grading/evolution run model calls | -| `agent_cli` | `string \| null` | Preferred agent binary | -| `hooks_installed` | `boolean` | Whether Claude hooks are configured | -| `initialized_at` | `string` | ISO timestamp of the last bootstrap | +| Field | Type | Description | +| ----------------- | --------------------------------------------------------- | --------------------------------------------- | +| `agent_type` | `claude_code \| codex \| opencode \| openclaw \| unknown` | Detected host agent | +| `cli_path` | `string` | Absolute path to the selftune CLI entry point | +| `llm_mode` | `agent \| api` | How grading/evolution run model calls | +| `agent_cli` | `string \| null` | Preferred agent binary | +| `hooks_installed` | `boolean` | Whether Claude hooks are configured | +| `initialized_at` | `string` | ISO timestamp of the last bootstrap | ## Shared Local Artifacts -| Artifact | Writer | Reader | -|----------|--------|--------| -| `~/.claude/session_telemetry_log.jsonl` | Hooks, ingestors, sync | Eval, grading, status, localdb | -| `~/.claude/skill_usage_log.jsonl` | Hooks | Eval, repair, status (deprecated — consolidated into `skill_invocations` table in SQLite) | -| `~/.claude/skill_usage_repaired.jsonl` | Sync / repair | Eval, status, localdb (deprecated — consolidated into `skill_invocations` table in SQLite) | -| `~/.claude/all_queries_log.jsonl` | Hooks, ingestors, sync | Eval, status, localdb | -| `~/.claude/evolution_audit_log.jsonl` | Evolution | Monitoring, status, localdb | -| `~/.claude/orchestrate_runs.jsonl` | Orchestrator | LocalDB, dashboard | -| `~/.claude/improvement_signals.jsonl` | Hooks (prompt-log) | session-stop hook, orchestrator | -| `~/.claude/.orchestrate.lock` | Orchestrator | session-stop hook (staleness check) | -| `~/.selftune/*.sqlite` | Hooks (direct-write), sync, materializer (backfill) | All reads: orchestrate, evolve, grade, status, dashboard | +| Artifact | Writer | Reader | +| --------------------------------------- | --------------------------------------------------- | ------------------------------------------------------------------------------------------ | +| `~/.claude/session_telemetry_log.jsonl` | Hooks, ingestors, sync | Eval, grading, status, localdb | +| `~/.claude/skill_usage_log.jsonl` | Hooks | Eval, repair, status (deprecated — consolidated into `skill_invocations` table in SQLite) | +| `~/.claude/skill_usage_repaired.jsonl` | Sync / repair | Eval, status, localdb (deprecated — consolidated into `skill_invocations` table in SQLite) | +| `~/.claude/all_queries_log.jsonl` | Hooks, ingestors, sync | Eval, status, localdb | +| `~/.claude/evolution_audit_log.jsonl` | Evolution | Monitoring, status, localdb | +| `~/.claude/orchestrate_runs.jsonl` | Orchestrator | LocalDB, dashboard | +| `~/.claude/improvement_signals.jsonl` | Hooks (prompt-log) | session-stop hook, orchestrator | +| `~/.claude/.orchestrate.lock` | Orchestrator | session-stop hook (staleness check) | +| `~/.selftune/*.sqlite` | Hooks (direct-write), sync, materializer (backfill) | All reads: orchestrate, evolve, grade, status, dashboard | ## The Evaluation Model -| Tier | What It Checks | Automated | -|------|----------------|-----------| -| Tier 1 — Trigger | Did the skill fire when it should have? | Yes | -| Tier 2 — Process | Did the session follow the expected workflow? | Yes | -| Tier 3 — Quality | Was the resulting work actually good enough? | Yes, via agent-as-grader | +| Tier | What It Checks | Automated | +| ---------------- | --------------------------------------------- | ------------------------ | +| Tier 1 — Trigger | Did the skill fire when it should have? | Yes | +| Tier 2 — Process | Did the session follow the expected workflow? | Yes | +| Tier 3 — Quality | Was the resulting work actually good enough? | Yes, via agent-as-grader | ## Invocation Taxonomy -| Type | Description | -|------|-------------| -| Explicit | The user names the skill directly | -| Implicit | The task matches the skill without naming it | +| Type | Description | +| ---------- | ------------------------------------------------- | +| Explicit | The user names the skill directly | +| Implicit | The task matches the skill without naming it | | Contextual | The task is implicit with real-world domain noise | -| Negative | Nearby queries that should not trigger the skill | +| Negative | Nearby queries that should not trigger the skill | ## Current Known Tensions diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 40a6eb1b..66ec76f3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,7 +26,7 @@ bun run cli/selftune/index.ts init ```bash make check # Runs lint + architecture lint + all tests -make lint # Biome check + architecture lint only +make lint # oxlint + oxfmt + architecture lint only make test # Tests only ``` @@ -51,6 +51,7 @@ This creates a temporary `HOME` directory in `/tmp`, copies test fixtures (3 ski Commands like `grade` and `evolve` need LLM calls. Test them in the devcontainer, based on the [official Claude Code devcontainer reference](https://code.claude.com/docs/en/devcontainer): **First-time setup** (one-time, auth persists in a Docker volume): + ```bash make sandbox-shell # drop into the container claude login # paste your token @@ -58,6 +59,7 @@ exit ``` **Run LLM tests:** + ```bash make sandbox-llm ``` @@ -82,10 +84,11 @@ Follow the conventions in [docs/golden-principles.md](docs/golden-principles.md) ## Code Style -[Biome](https://biomejs.dev) handles formatting and linting. Run before submitting: +[oxc](https://oxc.rs) handles linting (oxlint) and formatting (oxfmt). Run before submitting: ```bash bun run lint:fix +bun run format ``` ## Pull Request Expectations @@ -131,25 +134,25 @@ While linked, hooks in `~/.claude/settings.json` point through the symlink to yo When modifying JSONL log schemas or adding new fields, update all of these to keep the pipeline consistent: -| File | What to update | -|------|---------------| -| `cli/selftune/types.ts` | Add/modify the TypeScript interface | -| `cli/selftune/constants.ts` | Add log path constant if new file | -| `cli/selftune/localdb/schema.ts` | Add column to SQLite schema | -| `cli/selftune/localdb/materialize.ts` | Map JSONL field → SQLite column | -| `cli/selftune/normalization.ts` | Update canonical derivation if applicable | -| `cli/selftune/dashboard-contract.ts` | Expose field to dashboard API | -| `apps/local-dashboard/src/` | Consume field in UI components | -| `skill/references/logs.md` | Document the field for agents | +| File | What to update | +| ------------------------------------- | ----------------------------------------- | +| `cli/selftune/types.ts` | Add/modify the TypeScript interface | +| `cli/selftune/constants.ts` | Add log path constant if new file | +| `cli/selftune/localdb/schema.ts` | Add column to SQLite schema | +| `cli/selftune/localdb/materialize.ts` | Map JSONL field → SQLite column | +| `cli/selftune/normalization.ts` | Update canonical derivation if applicable | +| `cli/selftune/dashboard-contract.ts` | Expose field to dashboard API | +| `apps/local-dashboard/src/` | Consume field in UI components | +| `skill/references/logs.md` | Document the field for agents | ### Common Data Issues -| Symptom | Fix | -|---------|-----| -| Dashboard shows stale data | `selftune sync --force` | +| Symptom | Fix | +| ---------------------------------------- | ----------------------------------------------------------------------------------- | +| Dashboard shows stale data | `selftune sync --force` | | SQLite schema mismatch after code change | `selftune export` first, then `rm ~/.selftune/selftune.db && selftune sync --force` | -| Missing invocations after hook changes | Verify `~/.claude/settings.json` matchers, then `selftune doctor` | -| Need to backfill from transcripts | `selftune ingest claude --force` | +| Missing invocations after hook changes | Verify `~/.claude/settings.json` matchers, then `selftune doctor` | +| Need to backfill from transcripts | `selftune ingest claude --force` | ## Questions? diff --git a/Makefile b/Makefile index 71bd3359..6f40b22a 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,15 @@ -.PHONY: all clean lint test test-fast test-slow check typecheck-dashboard sandbox sandbox-install sandbox-llm sandbox-shell sandbox-shell-empty sandbox-reset sandbox-openclaw sandbox-openclaw-keep sandbox-openclaw-clean clean-branches +.PHONY: all clean lint test test-fast test-slow check typecheck-dashboard sandbox sandbox-install sandbox-llm sandbox-shell sandbox-shell-empty sandbox-shell-empty-workspace sandbox-reset sandbox-reset-state sandbox-openclaw sandbox-openclaw-keep sandbox-openclaw-clean clean-branches + +SANDBOX_CLI_VERSION := $(subst .,-,$(shell node -p "require('./package.json').version")) +SANDBOX_DATE_STAMP := $(shell date +%Y-%m-%d-%H%M%S) all: check clean: sandbox-openclaw-clean lint: - bunx biome check . + bunx oxlint + bunx oxfmt --check bun run lint-architecture.ts test: @@ -31,15 +35,21 @@ sandbox-llm: docker compose -f tests/sandbox/docker/docker-compose.yml up --build sandbox-shell: - docker compose -f tests/sandbox/docker/docker-compose.yml run --build selftune-sandbox bash + docker compose -f tests/sandbox/docker/docker-compose.yml run --build --name selftune-sandbox-v$(SANDBOX_CLI_VERSION)-$(SANDBOX_DATE_STAMP)-shell selftune-sandbox bash sandbox-shell-empty: - docker compose -f tests/sandbox/docker/docker-compose.yml run --build -e SKIP_PROVISION=1 selftune-sandbox bash + docker compose -f tests/sandbox/docker/docker-compose.yml run --build --name selftune-sandbox-v$(SANDBOX_CLI_VERSION)-$(SANDBOX_DATE_STAMP)-empty -e SKIP_PROVISION=1 selftune-sandbox bash + +sandbox-shell-empty-workspace: + docker compose -f tests/sandbox/docker/docker-compose.yml run --build --name selftune-sandbox-v$(SANDBOX_CLI_VERSION)-$(SANDBOX_DATE_STAMP)-workspace -e SKIP_PROVISION=1 selftune-sandbox bash /app/tests/sandbox/docker/prepare-workspace-selftune.sh sandbox-reset: -docker ps -aq --filter label=com.docker.compose.project=docker --filter label=com.docker.compose.service=selftune-sandbox | xargs docker rm -f docker compose -f tests/sandbox/docker/docker-compose.yml down -v +sandbox-reset-state: + docker compose -f tests/sandbox/docker/docker-compose.yml run --name selftune-sandbox-v$(SANDBOX_CLI_VERSION)-$(SANDBOX_DATE_STAMP)-reset-state -e SKIP_PROVISION=1 selftune-sandbox bash /app/tests/sandbox/docker/reset-sandbox-state.sh + sandbox-openclaw: docker compose -f tests/sandbox/docker/docker-compose.openclaw.yml up --build diff --git a/PRD.md b/PRD.md index b2ff1900..bfdad2d1 100644 --- a/PRD.md +++ b/PRD.md @@ -110,12 +110,12 @@ Most eval tools stop at tier 1 or, at best, synthetic tier 2. selftune runs all selftune classifies every trigger query into one of four types, drawn from eval best practices: -| Type | Description | Example | -|---|---|---| -| **Explicit** | Names the skill directly | "use the pptx skill to make slides" | -| **Implicit** | Describes the task without naming the skill | "make me a slide deck" | -| **Contextual** | Implicit with realistic domain noise | "I need slides for the Q3 board meeting next Tuesday" | -| **Negative** | Adjacent queries that should NOT trigger | "what format should I use for a presentation?" | +| Type | Description | Example | +| -------------- | ------------------------------------------- | ----------------------------------------------------- | +| **Explicit** | Names the skill directly | "use the pptx skill to make slides" | +| **Implicit** | Describes the task without naming the skill | "make me a slide deck" | +| **Contextual** | Implicit with realistic domain noise | "I need slides for the Q3 board meeting next Tuesday" | +| **Negative** | Adjacent queries that should NOT trigger | "what format should I use for a presentation?" | A healthy skill catches all three positive types. A skill that only catches explicit invocations is forcing users to babysit it. selftune surfaces this breakdown so skill authors know exactly what kind of improvement is needed. @@ -140,15 +140,19 @@ All adapters write to the same shared log schema. Everything downstream — eval ## Key Features ### Session Telemetry + Captures per-session process metrics across all three platforms: tool call counts by type, bash commands executed, skills triggered, error count, assistant turns, token usage. Written to `~/.claude/session_telemetry_log.jsonl`. ### False Negative Detection + Compares the universe of logged queries against actual skill trigger events. Surfaces the queries where a skill should have fired but didn't. These are the invisible failures that accumulate into user frustration. ### Eval Set Generation + Converts repaired/source-truth usage logs into trigger eval sets: positives (real queries that triggered), negatives (real queries that didn't), annotated with invocation type. Feeds directly into existing skill-creator eval infrastructure. ### Session Grading + Grades completed sessions against expectations using the agent the user already has installed — Claude Code, Codex, or OpenCode — without requiring a separate Anthropic API key. Produces `grading.json` compatible with the skill-creator eval viewer. Includes deterministic pre-gates that resolve expectations without LLM calls (<20ms), and graduated 0-1 scoring for finer-grained confidence tracking. Rich failure feedback provides structured explanations (`query`, `failure_reason`, `improvement_hint`, `invocation_type`) that feed directly into the evolution pipeline. ### Skill Evolution @@ -156,24 +160,31 @@ Grades completed sessions against expectations using the agent the user already Runs the description improvement loop using real usage signal as ground truth. Proposes new descriptions, validates against the eval set, confirms the pass rate improves, and writes the result to disk with a full audit trail. Supports Pareto multi-candidate evolution: generates N candidates in parallel, computes a Pareto frontier across invocation type dimensions (explicit, implicit, contextual, negative), and optionally merges complementary proposals. CLI flags: `--pareto` (default true), `--candidates N` (default 3, max 5). ### Grader Skill + A `skill-eval-grader` skill that makes the grader a first-class agent capability. Users can say "grade my last pptx session" and the agent reads telemetry, parses the transcript, grades inline, and writes `grading.json` — using their existing subscription, no extra setup. ### Process Stats + Aggregate telemetry across all sessions for a skill: average turns, tool call breakdown, error rates, bash command patterns. Useful for catching efficiency regressions and diagnosing thrashing. ### Skill Health Summary (`selftune status`) + Concise CLI overview of all skill health at a glance. Shows per-skill pass rates, trend direction (up/down/stable), missed query counts, status badges (HEALTHY/REGRESSED/NO DATA), unmatched queries total, pending evolution proposals, and system health from `doctor`. Runs in <500ms with zero LLM calls. Reuses `computeMonitoringSnapshot` from the monitoring pipeline. ### Last Session Insight (`selftune last`) + Quick post-session diagnostic showing the most recent session's triggered skills, unmatched queries, error count, tool call count, and a contextual recommendation. Designed for rapid feedback after a session ends. Zero LLM calls. ### Skill Health Dashboard (`selftune dashboard`) + Local React SPA served by `dashboard-server.ts`, backed by SQLite materialization and payload-oriented v2 API routes. Primary view is an overview page showing skill health, trends, unmatched queries, recent orchestrate activity, and pending proposals. Drill-down routes provide per-skill reports with pass-rate history, missed queries, evidence, and evolution context. ### Retroactive Replay (`selftune ingest claude`) + Batch ingestor for existing Claude Code session transcripts. Scans `~/.claude/projects//.jsonl`, extracts user queries and session metrics, and populates the shared JSONL logs. Idempotent via marker file — safe to run repeatedly. Supports `--since` date filtering, `--dry-run` preview, `--force` re-ingestion, and `--verbose` output. Bootstraps the eval corpus from existing sessions without waiting for hooks to accumulate data. ### Community Contribution (`selftune contribute`) + Opt-in export of anonymized skill observability data for community signal pooling. Assembles a `ContributionBundle` containing sanitized positive queries, eval entries with invocation taxonomy, grading summaries, evolution summaries, and session metrics. Two sanitization levels: conservative (paths, emails, secrets, IPs) and aggressive (adds identifiers, quoted strings, module names, 200-char truncation). Supports `--preview` to inspect before exporting, and `--submit` to create a GitHub issue with the bundle. --- @@ -191,15 +202,18 @@ Opt-in export of anonymized skill observability data for community signal poolin ## Success Metrics **Adoption** + - Time to first false-negative detection: target < 10 minutes from install - Time to first trustworthy local sync: target < 10 minutes from install **Effectiveness** + - Trigger pass rate improvement after one evolution loop: target > 15 percentage points - False negative detection rate: surface at least one missed trigger per 20 sessions for any undertriggering skill - Autonomous low-risk deploys maintain or improve watch metrics with automatic rollback when they regress **Retention** + - Skills with selftune installed show measurably lower explicit-invocation rates over 30 days - Users run the orchestrated loop at least once per skill per month @@ -209,12 +223,12 @@ Opt-in export of anonymized skill observability data for community signal poolin reins and selftune are complementary tools at different points in the agent development lifecycle: -| | reins | selftune | -|---|---|---| -| **When** | Repo setup, periodic audits | Continuously, every session | -| **What** | Scaffold, score, evolve repo structure | Observe, grade, evolve skill descriptions | +| | reins | selftune | +| ---------- | ------------------------------------------ | -------------------------------------------------- | +| **When** | Repo setup, periodic audits | Continuously, every session | +| **What** | Scaffold, score, evolve repo structure | Observe, grade, evolve skill descriptions | | **Output** | AGENTS.md, ARCHITECTURE.md, maturity score | Telemetry logs, grading reports, improved SKILL.md | -| **Signal** | Static analysis of repo structure | Live signal from real user sessions | +| **Signal** | Static analysis of repo structure | Live signal from real user sessions | Use reins to build the repo that makes agents effective. Use selftune to know whether the skills in that repo are actually working — and to make them better automatically. @@ -222,12 +236,12 @@ Use reins to build the repo that makes agents effective. Use selftune to know wh ## Release History -| npm Version | Date | Feature Milestones Included | -|-------------|------|-----------------------------| -| **0.1.0** | 2026-02-28 | M1 through M5 (observe, grade, evolve, watch, restructure) | -| **0.1.4** | 2026-03-01 | M6 and M7 (three-layer observability, replay + contribute) | -| **0.2.0** | 2026-03-05 | M8, M8.5 (sandbox harness, eval improvements, agents, guardrails, dashboard server) | -| **0.2.1** | 2026-03-10 | Source-truth sync hardening, SQLite-backed dashboard SPA, autonomy-first scheduling/orchestration | +| npm Version | Date | Feature Milestones Included | +| ----------- | ---------- | ------------------------------------------------------------------------------------------------- | +| **0.1.0** | 2026-02-28 | M1 through M5 (observe, grade, evolve, watch, restructure) | +| **0.1.4** | 2026-03-01 | M6 and M7 (three-layer observability, replay + contribute) | +| **0.2.0** | 2026-03-05 | M8, M8.5 (sandbox harness, eval improvements, agents, guardrails, dashboard server) | +| **0.2.1** | 2026-03-10 | Source-truth sync hardening, SQLite-backed dashboard SPA, autonomy-first scheduling/orchestration | --- @@ -236,6 +250,7 @@ Use reins to build the repo that makes agents effective. Use selftune to know wh > **Note:** These are feature phases used during development planning. They do not correspond to npm version numbers. See the Release History table above for the mapping. ### M1 — Observe and detect + - Claude Code hooks (Stop, PostToolUse, UserPromptSubmit) - Codex adapter (wrapper + rollout ingestor) - OpenCode adapter (SQLite reader) @@ -245,23 +260,27 @@ Use reins to build the repo that makes agents effective. Use selftune to know wh - Process telemetry stats ### M2 — Grade + - Session grader via agent subprocess (no API key required) - `skill-eval-grader` skill - `grading.json` output compatible with skill-creator eval viewer - `grade-session.ts --use-agent` with auto-detection ### M3 — Evolve (Complete) + - Description improvement loop wired to real usage signal - Validation against eval set before deploy - PR generation with diff and eval summary - Confidence threshold and stopping criteria ### M4 — Watch (Complete) + - Post-deploy monitoring - Regression detection - Escalation when performance degrades after a deploy ### M5 — Agent-First Skill Restructure (Complete) + - `init` command: auto-detect agent environment, write persistent config to `~/.selftune/config.json` - Skill decomposed from 370-line monolith into Reins-style routing table (~120 lines) - 8 workflow files (1 per command) with step-by-step agent guides @@ -270,6 +289,7 @@ Use reins to build the repo that makes agents effective. Use selftune to know wh - Doctor command enhanced with config health check ### M6 — Three-Layer Observability (Complete) + - `selftune status`: CLI skill health summary with pass rates, trends, and system health - `selftune last`: Quick insight from the most recent session - Redesigned `selftune dashboard`: SQLite-backed SPA with overview and per-skill drill-down routes @@ -278,6 +298,7 @@ Use reins to build the repo that makes agents effective. Use selftune to know wh - Three observability surfaces replace activity-metric-only dashboard with actionable skill health data ### M7 — Retroactive Replay & Community Contribution (Complete) + - `selftune ingest claude`: batch ingest Claude Code transcripts from `~/.claude/projects/` - Idempotent marker file prevents duplicate ingestion - Extracts all user queries per session (not just last), populates all three JSONL logs @@ -302,17 +323,20 @@ Four high-value eval improvements implemented in parallel: **Problem:** selftune had 499 unit tests but zero end-to-end validation. CLI commands were never exercised against realistic data in an integrated way. LLM-dependent commands (grade, evolve) couldn't be tested without a live agent CLI. **Solution:** + - **Layer 1 (Local Sandbox):** `tests/sandbox/run-sandbox.ts` — Exercises all 7 read-only CLI commands + 3 hooks against fixture data in an isolated `/tmp` directory. 10 tests, ~400ms. - **Layer 2 (Devcontainer + Claude CLI):** `tests/sandbox/docker/` and `.devcontainer/` — Devcontainer setup and orchestrator for `grade`, `evolve`, and `watch` using `claude -p` (Agent SDK CLI) with `--dangerously-skip-permissions`. - **Firewall Isolation:** `.devcontainer/init-firewall.sh` — Sandbox firewall based on official Claude Code devcontainer reference. - **Fixtures:** 3 real skills from skills.sh (find-skills, frontend-design, ai-image-generation) with differentiated health profiles. **Key Design Decisions:** + - HOME env var redirection for complete isolation (all paths use `homedir()`) - Two-layer architecture: fast local tests (free) + Docker LLM tests (costs tokens) - Devcontainer-based isolation with firewall, no API key needed ### M9 — Trustworthy Autonomy (1.0) + - Stronger candidate selection and evidence gating - Durable orchestrate run reports and decision visibility - End-to-end proof of autonomous deploy -> watch -> rollback @@ -329,7 +353,7 @@ Four high-value eval improvements implemented in parallel: 3. **Multi-skill conflict resolution.** When two skills compete for the same query, how does selftune decide which should win? This is a description-level problem that may require a separate conflict detector. -4. **Cross-developer signal pooling.** Anonymous aggregate signal from multiple developers could dramatically improve evolution quality. What's the opt-in model and privacy story? *(Partially addressed in M7/0.1.4: `selftune contribute` exports anonymized bundles with two-tier sanitization. Submission is via GitHub issue. Aggregation and ingestion of contributed bundles is future work.)* +4. **Cross-developer signal pooling.** Anonymous aggregate signal from multiple developers could dramatically improve evolution quality. What's the opt-in model and privacy story? _(Partially addressed in M7/0.1.4: `selftune contribute` exports anonymized bundles with two-tier sanitization. Submission is via GitHub issue. Aggregation and ingestion of contributed bundles is future work.)_ 5. **Evaluation of the evaluator.** How do we know the grader is grading correctly? We need meta-evals: known-good and known-bad sessions with ground truth verdicts. @@ -338,13 +362,17 @@ Four high-value eval improvements implemented in parallel: ## Appendix: Log Schema ### `~/.claude/session_telemetry_log.jsonl` + One record per completed session. Fields: `timestamp`, `session_id`, `source` (claude_code / codex / opencode), `cwd`, `transcript_path`, `last_user_query`, `tool_calls`, `total_tool_calls`, `bash_commands`, `skills_triggered`, `assistant_turns`, `errors_encountered`, `transcript_chars`. ### `~/.claude/skill_usage_log.jsonl` + One record per skill trigger event. Fields: `timestamp`, `session_id`, `skill_name`, `skill_path`, `query`, `triggered`, `source`, plus optional provenance fields `skill_scope`, `skill_project_root`, `skill_registry_dir`, and `skill_path_resolution_source` when selftune can prove whether the skill came from a project-local, global, admin, or system registry or explain why scope remains unknown. ### `~/.claude/all_queries_log.jsonl` + One record per user query. Fields: `timestamp`, `session_id`, `query`, `source`. ### `grading.json` + Output from the grader. Compatible with skill-creator eval viewer schema. Fields: `session_id`, `skill_name`, `transcript_path`, `graded_at`, `expectations` (each with `score` 0-1 and `source` tag), `summary` (with `mean_score`, `score_std_dev`), `execution_metrics`, `claims`, `eval_feedback`, `failure_feedback`. diff --git a/README.md b/README.md index 63932a9d..f6788ba7 100644 --- a/README.md +++ b/README.md @@ -105,38 +105,38 @@ A continuous feedback loop that makes your skills learn and adapt. Automatically Your agent runs these — you just say what you want ("improve my skills", "show the dashboard"). -| Group | Command | What it does | -|-------|---------|-------------| -| | `selftune status` | See which skills are undertriggering and why | -| | `selftune orchestrate` | Run the full autonomous loop (sync → evolve → watch) | -| | `selftune dashboard` | Open the visual skill health dashboard | -| | `selftune doctor` | Health check: logs, hooks, config, permissions | -| **ingest** | `selftune ingest claude` | Backfill from Claude Code transcripts | -| | `selftune ingest codex` | Import Codex rollout logs (experimental) | -| **grade** | `selftune grade --skill ` | Grade a skill session with evidence | -| | `selftune grade baseline --skill ` | Measure skill value vs no-skill baseline | -| **evolve** | `selftune evolve --skill ` | Propose, validate, and deploy improved descriptions | -| | `selftune evolve body --skill ` | Evolve full skill body or routing table | -| | `selftune evolve rollback --skill ` | Rollback a previous evolution | -| **eval** | `selftune eval generate --skill ` | Generate eval sets (`--synthetic` for cold-start) | -| | `selftune eval unit-test --skill ` | Run or generate skill-level unit tests | -| | `selftune eval composability --skill ` | Detect conflicts between co-occurring skills | -| | `selftune eval import` | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) | -| **auto** | `selftune cron setup` | Install OS-level scheduling (cron/launchd/systemd) | -| | `selftune watch --skill ` | Monitor after deploy. Auto-rollback on regression. | -| **other** | `selftune telemetry` | Manage anonymous usage analytics (status, enable, disable) | -| | `selftune alpha upload` | Run a manual alpha upload cycle and emit a JSON send summary | +| Group | Command | What it does | +| ---------- | -------------------------------------------- | ------------------------------------------------------------------------------------------- | +| | `selftune status` | See which skills are undertriggering and why | +| | `selftune orchestrate` | Run the full autonomous loop (sync → evolve → watch) | +| | `selftune dashboard` | Open the visual skill health dashboard | +| | `selftune doctor` | Health check: logs, hooks, config, permissions | +| **ingest** | `selftune ingest claude` | Backfill from Claude Code transcripts | +| | `selftune ingest codex` | Import Codex rollout logs (experimental) | +| **grade** | `selftune grade --skill ` | Grade a skill session with evidence | +| | `selftune grade baseline --skill ` | Measure skill value vs no-skill baseline | +| **evolve** | `selftune evolve --skill ` | Propose, validate, and deploy improved descriptions | +| | `selftune evolve body --skill ` | Evolve full skill body or routing table | +| | `selftune evolve rollback --skill ` | Rollback a previous evolution | +| **eval** | `selftune eval generate --skill ` | Generate eval sets (`--synthetic` for cold-start) | +| | `selftune eval unit-test --skill ` | Run or generate skill-level unit tests | +| | `selftune eval composability --skill ` | Detect conflicts between co-occurring skills | +| | `selftune eval import` | Import external eval corpus from [SkillsBench](https://github.com/benchflow-ai/skillsbench) | +| **auto** | `selftune cron setup` | Install OS-level scheduling (cron/launchd/systemd) | +| | `selftune watch --skill ` | Monitor after deploy. Auto-rollback on regression. | +| **other** | `selftune telemetry` | Manage anonymous usage analytics (status, enable, disable) | +| | `selftune alpha upload` | Run a manual alpha upload cycle and emit a JSON send summary | Full command reference: `selftune --help` ## Why Not Just Rewrite Skills Manually? -| Approach | Problem | -|---|---| -| Rewrite the description yourself | No data on how users actually talk. No validation. No regression detection. | -| Add "ALWAYS invoke when..." directives | Brittle. One agent rewrite away from breaking. | -| Force-load skills on every prompt | Doesn't fix the description. Expensive band-aid. | -| **selftune** | Learns from real usage, rewrites descriptions to match how you work, validates against eval sets, auto-rollbacks on regressions. | +| Approach | Problem | +| -------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| Rewrite the description yourself | No data on how users actually talk. No validation. No regression detection. | +| Add "ALWAYS invoke when..." directives | Brittle. One agent rewrite away from breaking. | +| Force-load skills on every prompt | Doesn't fix the description. Expensive band-aid. | +| **selftune** | Learns from real usage, rewrites descriptions to match how you work, validates against eval sets, auto-rollbacks on regressions. | ## Different Layer, Different Problem @@ -144,14 +144,14 @@ LLM observability tools trace API calls. Infrastructure tools monitor servers. N selftune is complementary to these tools, not competitive. They trace what happens inside the LLM. selftune makes sure the right skill is called in the first place. -| Dimension | selftune | Langfuse | LangSmith | OpenLIT | -|-----------|----------|----------|-----------|---------| -| **Layer** | Skill-specific | LLM call | Agent trace | Infrastructure | -| **Detects** | Missed triggers, false negatives, skill conflicts | Token usage, latency | Chain failures | System metrics | -| **Improves** | Descriptions, body, and routing automatically | — | — | — | -| **Setup** | Zero deps, zero API keys | Self-host or cloud | Cloud required | Helm chart | -| **Price** | Free (MIT) | Freemium | Paid | Free | -| **Unique** | Self-improving skills + auto-rollback | Prompt management | Evaluations | Dashboards | +| Dimension | selftune | Langfuse | LangSmith | OpenLIT | +| ------------ | ------------------------------------------------- | -------------------- | -------------- | -------------- | +| **Layer** | Skill-specific | LLM call | Agent trace | Infrastructure | +| **Detects** | Missed triggers, false negatives, skill conflicts | Token usage, latency | Chain failures | System metrics | +| **Improves** | Descriptions, body, and routing automatically | — | — | — | +| **Setup** | Zero deps, zero API keys | Self-host or cloud | Cloud required | Helm chart | +| **Price** | Free (MIT) | Freemium | Paid | Free | +| **Unique** | Self-improving skills + auto-rollback | Prompt management | Evaluations | Dashboards | ## Platforms diff --git a/ROADMAP.md b/ROADMAP.md index d4cf9152..544190b3 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,6 +1,7 @@ # selftune Roadmap ## Done + - Two-layer sandbox architecture (local + Docker isolation) - Claude Code sandbox with `claude -p` integration - Replay and contribute commands (v0.7) @@ -19,26 +20,31 @@ - SPA served at `/` as the supported local dashboard ## In Progress + - Multi-agent sandbox expansion ## Planned ### Sandbox Expansion + - Codex sandbox support - OpenCode sandbox support - CI integration (sandbox on every PR) - Fixture expansion with codex/opencode skill profiles ### Badge Showcase + - Showcase skill health badges for top community skills in the README - Generate branded SVG badges from real eval results ### Skill Quality Infrastructure + - Auto-evolve mode — skills improve without manual intervention - Marketplace integration — selftune metrics on community skill hubs - Multi-skill conflict detection — identify competing skills for the same query ### Personalization SDK (Vision) + - **SDK for skill creators** — `selftune.config.ts` lets creators declare tunable surfaces (descriptions, workflows, parameters) vs fixed surfaces (core logic, tools) - **Per-user adaptation** — Skills evolve locally to match each user's language and workflow patterns, while preserving the author's canonical version - **Workflow personalization** — Auto-generated multi-skill sequences based on individual usage patterns @@ -46,8 +52,8 @@ ## Agent Support Matrix -| Agent | Ingestor | Local Sandbox | Docker Sandbox | -|-------|----------|---------------|----------------| -| Claude Code | Yes | Yes | Yes | -| Codex | Yes | Planned | Planned | -| OpenCode | Yes | Planned | Planned | +| Agent | Ingestor | Local Sandbox | Docker Sandbox | +| ----------- | -------- | ------------- | -------------- | +| Claude Code | Yes | Yes | Yes | +| Codex | Yes | Planned | Planned | +| OpenCode | Yes | Planned | Planned | diff --git a/SECURITY.md b/SECURITY.md index bc442ae3..6c6f9cba 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,7 +3,7 @@ ## Supported Versions | Version | Supported | -|---------|--------------------| +| ------- | ------------------ | | 0.1.x | :white_check_mark: | ## Reporting a Vulnerability diff --git a/apps/local-dashboard/index.html b/apps/local-dashboard/index.html index 4ddc8776..18b19e85 100644 --- a/apps/local-dashboard/index.html +++ b/apps/local-dashboard/index.html @@ -1,13 +1,13 @@ - + - - - - selftune — Dashboard - - - -
- - + + + + selftune — Dashboard + + + +
+ + diff --git a/apps/local-dashboard/package.json b/apps/local-dashboard/package.json index 49fd1eb8..8d7edb0d 100644 --- a/apps/local-dashboard/package.json +++ b/apps/local-dashboard/package.json @@ -1,7 +1,7 @@ { "name": "@selftune/local-dashboard", - "private": true, "version": "0.1.0", + "private": true, "type": "module", "scripts": { "dev": "concurrently \"cd ../.. && bun --watch run cli/selftune/dashboard-server.ts --port 7888 --runtime-mode dev-server\" \"sh -c 'echo \\\"Waiting for dashboard server on localhost:7888...\\\"; i=0; max=150; until curl -fsS http://localhost:7888/api/health >/dev/null 2>&1; do i=$((i+1)); if [ \\\"$i\\\" -ge \\\"$max\\\" ]; then echo \\\"Dashboard server did not become healthy within 30s\\\"; exit 1; fi; sleep 0.2; done; echo \\\"Dashboard server healthy; starting Vite.\\\"; vite --strictPort'\"", @@ -17,6 +17,7 @@ "@dnd-kit/sortable": "^10.0.0", "@dnd-kit/utilities": "^3.2.2", "@fontsource-variable/geist": "^5.2.8", + "@selftune/ui": "workspace:*", "@tanstack/react-query": "^5.90.21", "@tanstack/react-table": "^8.21.3", "class-variance-authority": "^0.7.1", @@ -32,14 +33,13 @@ "tailwind-merge": "^3.5.0", "tw-animate-css": "^1.4.0", "vaul": "^1.1.2", - "zod": "^4.3.6", - "@selftune/ui": "workspace:*" + "zod": "^4.3.6" }, "devDependencies": { "@selftune/telemetry-contract": "workspace:*", + "@tailwindcss/vite": "^4.2.1", "@types/react": "^19.1.6", "@types/react-dom": "^19.1.6", - "@tailwindcss/vite": "^4.2.1", "@vitejs/plugin-react": "^4.5.2", "concurrently": "^9.1.2", "shadcn": "^4.0.5", diff --git a/apps/local-dashboard/src/App.tsx b/apps/local-dashboard/src/App.tsx index eabed16f..0bdc2ec3 100644 --- a/apps/local-dashboard/src/App.tsx +++ b/apps/local-dashboard/src/App.tsx @@ -1,19 +1,20 @@ -import { useMemo, useState } from "react" -import { BrowserRouter, Route, Routes } from "react-router-dom" -import { QueryClient, QueryClientProvider } from "@tanstack/react-query" -import { AppSidebar } from "@/components/app-sidebar" -import { SiteHeader } from "@/components/site-header" -import { ThemeProvider } from "@/components/theme-provider" -import { SidebarInset, SidebarProvider } from "@/components/ui/sidebar" -import { TooltipProvider } from "@selftune/ui/primitives" -import { Overview } from "@/pages/Overview" -import { SkillReport } from "@/pages/SkillReport" -import { Status } from "@/pages/Status" -import { useOverview } from "@/hooks/useOverview" -import { RuntimeFooter } from "@/components/runtime-footer" -import { useSSE } from "@/hooks/useSSE" -import type { SkillHealthStatus, SkillSummary } from "@/types" -import { deriveStatus, sortByPassRateAndChecks } from "@selftune/ui/lib" +import { deriveStatus, sortByPassRateAndChecks } from "@selftune/ui/lib"; +import { TooltipProvider } from "@selftune/ui/primitives"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { useMemo, useState } from "react"; +import { BrowserRouter, Route, Routes } from "react-router-dom"; + +import { AppSidebar } from "@/components/app-sidebar"; +import { RuntimeFooter } from "@/components/runtime-footer"; +import { SiteHeader } from "@/components/site-header"; +import { ThemeProvider } from "@/components/theme-provider"; +import { SidebarInset, SidebarProvider } from "@/components/ui/sidebar"; +import { useOverview } from "@/hooks/useOverview"; +import { useSSE } from "@/hooks/useSSE"; +import { Overview } from "@/pages/Overview"; +import { SkillReport } from "@/pages/SkillReport"; +import { Status } from "@/pages/Status"; +import type { SkillHealthStatus, SkillSummary } from "@/types"; const queryClient = new QueryClient({ defaultOptions: { @@ -22,7 +23,7 @@ const queryClient = new QueryClient({ gcTime: 5 * 60 * 1000, }, }, -}) +}); function SkillReportWithHeader() { return ( @@ -30,7 +31,7 @@ function SkillReportWithHeader() { - ) + ); } function StatusWithHeader() { @@ -39,18 +40,18 @@ function StatusWithHeader() { - ) + ); } function DashboardShell() { - useSSE() - const [search, setSearch] = useState("") - const [statusFilter, setStatusFilter] = useState("ALL") - const overviewQuery = useOverview() - const { data } = overviewQuery + useSSE(); + const [search, setSearch] = useState(""); + const [statusFilter, setStatusFilter] = useState("ALL"); + const overviewQuery = useOverview(); + const { data } = overviewQuery; const skillNavItems = useMemo(() => { - if (!data) return [] + if (!data) return []; return sortByPassRateAndChecks( data.skills.map((s: SkillSummary) => ({ name: s.skill_name, @@ -58,15 +59,15 @@ function DashboardShell() { status: deriveStatus(s.pass_rate, s.total_checks), passRate: s.total_checks > 0 ? s.pass_rate : null, checks: s.total_checks, - })) - ) - }, [data]) + })), + ); + }, [data]); const filteredNavItems = useMemo(() => { - if (!search) return skillNavItems - const q = search.toLowerCase() - return skillNavItems.filter((s) => s.name.toLowerCase().includes(q)) - }, [skillNavItems, search]) + if (!search) return skillNavItems; + const q = search.toLowerCase(); + return skillNavItems.filter((s) => s.name.toLowerCase().includes(q)); + }, [skillNavItems, search]); return ( @@ -83,7 +84,12 @@ function DashboardShell() { element={ <> - + } /> @@ -93,7 +99,7 @@ function DashboardShell() { - ) + ); } export function App() { @@ -107,5 +113,5 @@ export function App() { - ) + ); } diff --git a/apps/local-dashboard/src/components/app-sidebar.tsx b/apps/local-dashboard/src/components/app-sidebar.tsx index ce013df1..4921d201 100644 --- a/apps/local-dashboard/src/components/app-sidebar.tsx +++ b/apps/local-dashboard/src/components/app-sidebar.tsx @@ -1,27 +1,10 @@ -import { useEffect, useMemo, useState } from "react" -import { Link, useLocation } from "react-router-dom" +import { formatRate } from "@selftune/ui/lib"; import { Badge, Collapsible, CollapsibleContent, CollapsibleTrigger, -} from "@selftune/ui/primitives" -import { Input } from "@/components/ui/input" -import { - Sidebar, - SidebarContent, - SidebarFooter, - SidebarGroup, - SidebarGroupContent, - SidebarGroupLabel, - SidebarHeader, - SidebarMenu, - SidebarMenuButton, - SidebarMenuItem, - SidebarMenuSub, - SidebarMenuSubButton, - SidebarMenuSubItem, -} from "@/components/ui/sidebar" +} from "@selftune/ui/primitives"; import { ActivityIcon, AlertTriangleIcon, @@ -36,16 +19,34 @@ import { SearchIcon, ServerIcon, XCircleIcon, -} from "lucide-react" -import { formatRate } from "@selftune/ui/lib" -import type { SkillHealthStatus } from "@/types" +} from "lucide-react"; +import { useEffect, useMemo, useState } from "react"; +import { Link, useLocation } from "react-router-dom"; + +import { Input } from "@/components/ui/input"; +import { + Sidebar, + SidebarContent, + SidebarFooter, + SidebarGroup, + SidebarGroupContent, + SidebarGroupLabel, + SidebarHeader, + SidebarMenu, + SidebarMenuButton, + SidebarMenuItem, + SidebarMenuSub, + SidebarMenuSubButton, + SidebarMenuSubItem, +} from "@/components/ui/sidebar"; +import type { SkillHealthStatus } from "@/types"; interface SkillNavItem { - name: string - scope: string | null - status: SkillHealthStatus - passRate: number | null - checks: number + name: string; + scope: string | null; + status: SkillHealthStatus; + passRate: number | null; + checks: number; } const STATUS_ICON: Record = { @@ -54,7 +55,7 @@ const STATUS_ICON: Record = { CRITICAL: , UNGRADED: , UNKNOWN: , -} +}; const SCOPE_CONFIG: Record = { project: { label: "Project", icon: }, @@ -62,7 +63,7 @@ const SCOPE_CONFIG: Record = { system: { label: "System", icon: }, admin: { label: "Admin", icon: }, unknown: { label: "Unknown", icon: }, -} +}; function ScopeGroup({ scope, @@ -70,18 +71,18 @@ function ScopeGroup({ pathname, defaultOpen, }: { - scope: string - skills: SkillNavItem[] - pathname: string - defaultOpen: boolean + scope: string; + skills: SkillNavItem[]; + pathname: string; + defaultOpen: boolean; }) { - const config = SCOPE_CONFIG[scope] ?? { label: scope, icon: } - const hasActive = skills.some((s) => pathname === `/skills/${encodeURIComponent(s.name)}`) - const [open, setOpen] = useState(defaultOpen || hasActive) + const config = SCOPE_CONFIG[scope] ?? { label: scope, icon: }; + const hasActive = skills.some((s) => pathname === `/skills/${encodeURIComponent(s.name)}`); + const [open, setOpen] = useState(defaultOpen || hasActive); useEffect(() => { - if (hasActive) setOpen(true) - }, [hasActive]) + if (hasActive) setOpen(true); + }, [hasActive]); return ( @@ -97,7 +98,7 @@ function ScopeGroup({ {skills.map((skill) => { - const isActive = pathname === `/skills/${encodeURIComponent(skill.name)}` + const isActive = pathname === `/skills/${encodeURIComponent(skill.name)}`; return ( {skill.name} @@ -118,13 +121,13 @@ function ScopeGroup({ - ) + ); })} - ) + ); } export function AppSidebar({ @@ -134,33 +137,33 @@ export function AppSidebar({ version, ...props }: React.ComponentProps & { - skills: SkillNavItem[] - search: string - onSearchChange: (v: string) => void - version?: string + skills: SkillNavItem[]; + search: string; + onSearchChange: (v: string) => void; + version?: string; }) { - const location = useLocation() + const location = useLocation(); const scopeGroups = useMemo(() => { - const groups: Record = {} + const groups: Record = {}; for (const skill of skills) { - const key = skill.scope ?? "unknown" - if (!groups[key]) groups[key] = [] - groups[key].push(skill) + const key = skill.scope ?? "unknown"; + if (!groups[key]) groups[key] = []; + groups[key].push(skill); } // Sort: global first, then project, then known scopes, then any unexpected ones - const order = ["global", "project", "system", "admin", "unknown"] + const order = ["global", "project", "system", "admin", "unknown"]; const ordered = order .filter((k) => groups[k]?.length) - .map((k) => ({ scope: k, skills: groups[k] })) + .map((k) => ({ scope: k, skills: groups[k] })); const remaining = Object.keys(groups) .filter((k) => !order.includes(k)) .sort() - .map((k) => ({ scope: k, skills: groups[k] })) - return [...ordered, ...remaining] - }, [skills]) + .map((k) => ({ scope: k, skills: groups[k] })); + return [...ordered, ...remaining]; + }, [skills]); - const hasMultipleScopes = scopeGroups.length > 1 + const hasMultipleScopes = scopeGroups.length > 1; return ( @@ -173,7 +176,10 @@ export function AppSidebar({ > - ) + ); } diff --git a/apps/local-dashboard/src/components/site-header.tsx b/apps/local-dashboard/src/components/site-header.tsx index 32d78d93..437d0466 100644 --- a/apps/local-dashboard/src/components/site-header.tsx +++ b/apps/local-dashboard/src/components/site-header.tsx @@ -1,6 +1,6 @@ -import { SidebarTrigger } from "@/components/ui/sidebar" -import { Separator } from "@/components/ui/separator" -import { ThemeToggle } from "@/components/theme-toggle" +import { ThemeToggle } from "@/components/theme-toggle"; +import { Separator } from "@/components/ui/separator"; +import { SidebarTrigger } from "@/components/ui/sidebar"; export function SiteHeader() { return ( @@ -13,5 +13,5 @@ export function SiteHeader() { - ) + ); } diff --git a/apps/local-dashboard/src/components/theme-provider.tsx b/apps/local-dashboard/src/components/theme-provider.tsx index 0fe66f49..7bc8b164 100644 --- a/apps/local-dashboard/src/components/theme-provider.tsx +++ b/apps/local-dashboard/src/components/theme-provider.tsx @@ -1,66 +1,66 @@ -import { createContext, useContext, useEffect, useState, type ReactNode } from "react" +import { createContext, useContext, useEffect, useState, type ReactNode } from "react"; -type Theme = "dark" | "light" | "system" +type Theme = "dark" | "light" | "system"; interface ThemeProviderState { - theme: Theme - setTheme: (theme: Theme) => void + theme: Theme; + setTheme: (theme: Theme) => void; } -const ThemeProviderContext = createContext(undefined) +const ThemeProviderContext = createContext(undefined); -const STORAGE_KEY = "selftune-theme" -const VALID_THEMES: Theme[] = ["dark", "light", "system"] +const STORAGE_KEY = "selftune-theme"; +const VALID_THEMES: Theme[] = ["dark", "light", "system"]; function readStoredTheme(defaultTheme: Theme): Theme { - const raw = localStorage.getItem(STORAGE_KEY) - return VALID_THEMES.includes(raw as Theme) ? (raw as Theme) : defaultTheme + const raw = localStorage.getItem(STORAGE_KEY); + return VALID_THEMES.includes(raw as Theme) ? (raw as Theme) : defaultTheme; } export function ThemeProvider({ children, defaultTheme = "dark", }: { - children: ReactNode - defaultTheme?: Theme + children: ReactNode; + defaultTheme?: Theme; }) { - const [theme, setTheme] = useState(() => readStoredTheme(defaultTheme)) + const [theme, setTheme] = useState(() => readStoredTheme(defaultTheme)); useEffect(() => { - const root = window.document.documentElement - const mediaQuery = window.matchMedia("(prefers-color-scheme: dark)") + const root = window.document.documentElement; + const mediaQuery = window.matchMedia("(prefers-color-scheme: dark)"); const applyTheme = (next: "dark" | "light") => { - root.classList.remove("light", "dark") - root.classList.add(next) - } + root.classList.remove("light", "dark"); + root.classList.add(next); + }; if (theme === "system") { - const applySystemTheme = () => applyTheme(mediaQuery.matches ? "dark" : "light") - applySystemTheme() - mediaQuery.addEventListener("change", applySystemTheme) - return () => mediaQuery.removeEventListener("change", applySystemTheme) + const applySystemTheme = () => applyTheme(mediaQuery.matches ? "dark" : "light"); + applySystemTheme(); + mediaQuery.addEventListener("change", applySystemTheme); + return () => mediaQuery.removeEventListener("change", applySystemTheme); } - applyTheme(theme) - }, [theme]) + applyTheme(theme); + }, [theme]); return ( { - localStorage.setItem(STORAGE_KEY, t) - setTheme(t) + localStorage.setItem(STORAGE_KEY, t); + setTheme(t); }, }} > {children} - ) + ); } export function useTheme() { - const context = useContext(ThemeProviderContext) - if (context === undefined) throw new Error("useTheme must be used within a ThemeProvider") - return context + const context = useContext(ThemeProviderContext); + if (context === undefined) throw new Error("useTheme must be used within a ThemeProvider"); + return context; } diff --git a/apps/local-dashboard/src/components/theme-toggle.tsx b/apps/local-dashboard/src/components/theme-toggle.tsx index d10e2f2a..fdd198ba 100644 --- a/apps/local-dashboard/src/components/theme-toggle.tsx +++ b/apps/local-dashboard/src/components/theme-toggle.tsx @@ -1,9 +1,10 @@ -import { MoonIcon, SunIcon } from "lucide-react" -import { Button } from "@selftune/ui/primitives" -import { useTheme } from "@/components/theme-provider" +import { Button } from "@selftune/ui/primitives"; +import { MoonIcon, SunIcon } from "lucide-react"; + +import { useTheme } from "@/components/theme-provider"; export function ThemeToggle() { - const { theme, setTheme } = useTheme() + const { theme, setTheme } = useTheme(); return ( - ) + ); } diff --git a/apps/local-dashboard/src/components/ui/avatar.tsx b/apps/local-dashboard/src/components/ui/avatar.tsx index e4fed865..9f9660ae 100644 --- a/apps/local-dashboard/src/components/ui/avatar.tsx +++ b/apps/local-dashboard/src/components/ui/avatar.tsx @@ -1,16 +1,16 @@ -"use client" +"use client"; -import * as React from "react" -import { Avatar as AvatarPrimitive } from "@base-ui/react/avatar" +import { Avatar as AvatarPrimitive } from "@base-ui/react/avatar"; +import * as React from "react"; -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils"; function Avatar({ className, size = "default", ...props }: AvatarPrimitive.Root.Props & { - size?: "default" | "sm" | "lg" + size?: "default" | "sm" | "lg"; }) { return ( - ) + ); } function AvatarImage({ className, ...props }: AvatarPrimitive.Image.Props) { return ( - ) + ); } -function AvatarFallback({ - className, - ...props -}: AvatarPrimitive.Fallback.Props) { +function AvatarFallback({ className, ...props }: AvatarPrimitive.Fallback.Props) { return ( - ) + ); } function AvatarBadge({ className, ...props }: React.ComponentProps<"span">) { @@ -63,11 +57,11 @@ function AvatarBadge({ className, ...props }: React.ComponentProps<"span">) { "group-data-[size=sm]/avatar:size-2 group-data-[size=sm]/avatar:[&>svg]:hidden", "group-data-[size=default]/avatar:size-2.5 group-data-[size=default]/avatar:[&>svg]:size-2", "group-data-[size=lg]/avatar:size-3 group-data-[size=lg]/avatar:[&>svg]:size-2", - className + className, )} {...props} /> - ) + ); } function AvatarGroup({ className, ...props }: React.ComponentProps<"div">) { @@ -76,34 +70,24 @@ function AvatarGroup({ className, ...props }: React.ComponentProps<"div">) { data-slot="avatar-group" className={cn( "group/avatar-group flex -space-x-2 *:data-[slot=avatar]:ring-2 *:data-[slot=avatar]:ring-background", - className + className, )} {...props} /> - ) + ); } -function AvatarGroupCount({ - className, - ...props -}: React.ComponentProps<"div">) { +function AvatarGroupCount({ className, ...props }: React.ComponentProps<"div">) { return (
svg]:size-4 group-has-data-[size=lg]/avatar-group:[&>svg]:size-5 group-has-data-[size=sm]/avatar-group:[&>svg]:size-3", - className + className, )} {...props} /> - ) + ); } -export { - Avatar, - AvatarImage, - AvatarFallback, - AvatarGroup, - AvatarGroupCount, - AvatarBadge, -} +export { Avatar, AvatarImage, AvatarFallback, AvatarGroup, AvatarGroupCount, AvatarBadge }; diff --git a/apps/local-dashboard/src/components/ui/breadcrumb.tsx b/apps/local-dashboard/src/components/ui/breadcrumb.tsx index 3d85c18d..b6381016 100644 --- a/apps/local-dashboard/src/components/ui/breadcrumb.tsx +++ b/apps/local-dashboard/src/components/ui/breadcrumb.tsx @@ -1,19 +1,14 @@ -import * as React from "react" -import { mergeProps } from "@base-ui/react/merge-props" -import { useRender } from "@base-ui/react/use-render" +import { mergeProps } from "@base-ui/react/merge-props"; +import { useRender } from "@base-ui/react/use-render"; +import { ChevronRightIcon, MoreHorizontalIcon } from "lucide-react"; +import * as React from "react"; -import { cn } from "@/lib/utils" -import { ChevronRightIcon, MoreHorizontalIcon } from "lucide-react" +import { cn } from "@/lib/utils"; function Breadcrumb({ className, ...props }: React.ComponentProps<"nav">) { return ( -
- ) + ); } const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => { - const colorConfig = Object.entries(config).filter( - ([, config]) => config.theme || config.color - ) + const colorConfig = Object.entries(config).filter(([, config]) => config.theme || config.color); if (!colorConfig.length) { - return null + return null; } return ( @@ -85,22 +79,20 @@ const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => { ${prefix} [data-chart="${id}"] { ${colorConfig .map(([key, itemConfig]) => { - const color = - itemConfig.theme?.[theme as keyof typeof itemConfig.theme] || - itemConfig.color - return color ? ` --color-${key}: ${color};` : null + const color = itemConfig.theme?.[theme as keyof typeof itemConfig.theme] || itemConfig.color; + return color ? ` --color-${key}: ${color};` : null; }) .join("\n")} } -` +`, ) .join("\n"), }} /> - ) -} + ); +}; -const ChartTooltip = RechartsPrimitive.Tooltip +const ChartTooltip = RechartsPrimitive.Tooltip; function ChartTooltipContent({ active, @@ -118,61 +110,51 @@ function ChartTooltipContent({ labelKey, }: React.ComponentProps & React.ComponentProps<"div"> & { - hideLabel?: boolean - hideIndicator?: boolean - indicator?: "line" | "dot" | "dashed" - nameKey?: string - labelKey?: string + hideLabel?: boolean; + hideIndicator?: boolean; + indicator?: "line" | "dot" | "dashed"; + nameKey?: string; + labelKey?: string; }) { - const { config } = useChart() + const { config } = useChart(); const tooltipLabel = React.useMemo(() => { if (hideLabel || !payload?.length) { - return null + return null; } - const [item] = payload - const key = `${labelKey || item?.dataKey || item?.name || "value"}` - const itemConfig = getPayloadConfigFromPayload(config, item, key) + const [item] = payload; + const key = `${labelKey || item?.dataKey || item?.name || "value"}`; + const itemConfig = getPayloadConfigFromPayload(config, item, key); const value = !labelKey && typeof label === "string" ? config[label as keyof typeof config]?.label || label - : itemConfig?.label + : itemConfig?.label; if (labelFormatter) { return ( -
- {labelFormatter(value, payload)} -
- ) +
{labelFormatter(value, payload)}
+ ); } if (!value) { - return null + return null; } - return
{value}
- }, [ - label, - labelFormatter, - payload, - hideLabel, - labelClassName, - config, - labelKey, - ]) + return
{value}
; + }, [label, labelFormatter, payload, hideLabel, labelClassName, config, labelKey]); if (!active || !payload?.length) { - return null + return null; } - const nestLabel = payload.length === 1 && indicator !== "dot" + const nestLabel = payload.length === 1 && indicator !== "dot"; return (
{!nestLabel ? tooltipLabel : null} @@ -180,16 +162,16 @@ function ChartTooltipContent({ {payload .filter((item) => item.type !== "none") .map((item, index) => { - const key = `${nameKey || item.name || item.dataKey || "value"}` - const itemConfig = getPayloadConfigFromPayload(config, item, key) - const indicatorColor = color || item.payload.fill || item.color + const key = `${nameKey || item.name || item.dataKey || "value"}`; + const itemConfig = getPayloadConfigFromPayload(config, item, key); + const indicatorColor = color || item.payload.fill || item.color; return (
svg]:h-2.5 [&>svg]:w-2.5 [&>svg]:text-muted-foreground", - indicator === "dot" && "items-center" + indicator === "dot" && "items-center", )} > {formatter && item?.value !== undefined && item.name ? ( @@ -209,7 +191,7 @@ function ChartTooltipContent({ "w-0 border-[1.5px] border-dashed bg-transparent": indicator === "dashed", "my-0.5": nestLabel && indicator === "dashed", - } + }, )} style={ { @@ -223,7 +205,7 @@ function ChartTooltipContent({
@@ -241,14 +223,14 @@ function ChartTooltipContent({ )}
- ) + ); })}
- ) + ); } -const ChartLegend = RechartsPrimitive.Legend +const ChartLegend = RechartsPrimitive.Legend; function ChartLegendContent({ className, @@ -258,13 +240,13 @@ function ChartLegendContent({ nameKey, }: React.ComponentProps<"div"> & Pick & { - hideIcon?: boolean - nameKey?: string + hideIcon?: boolean; + nameKey?: string; }) { - const { config } = useChart() + const { config } = useChart(); if (!payload?.length) { - return null + return null; } return ( @@ -272,20 +254,20 @@ function ChartLegendContent({ className={cn( "flex items-center justify-center gap-4", verticalAlign === "top" ? "pb-3" : "pt-3", - className + className, )} > {payload .filter((item) => item.type !== "none") .map((item, index) => { - const key = `${nameKey || item.dataKey || "value"}` - const itemConfig = getPayloadConfigFromPayload(config, item, key) + const key = `${nameKey || item.dataKey || "value"}`; + const itemConfig = getPayloadConfigFromPayload(config, item, key); return (
svg]:h-3 [&>svg]:w-3 [&>svg]:text-muted-foreground" + "flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3 [&>svg]:text-muted-foreground", )} > {itemConfig?.icon && !hideIcon ? ( @@ -300,48 +282,35 @@ function ChartLegendContent({ )} {itemConfig?.label}
- ) + ); })}
- ) + ); } -function getPayloadConfigFromPayload( - config: ChartConfig, - payload: unknown, - key: string -) { +function getPayloadConfigFromPayload(config: ChartConfig, payload: unknown, key: string) { if (typeof payload !== "object" || payload === null) { - return undefined + return undefined; } const payloadPayload = - "payload" in payload && - typeof payload.payload === "object" && - payload.payload !== null + "payload" in payload && typeof payload.payload === "object" && payload.payload !== null ? payload.payload - : undefined + : undefined; - let configLabelKey: string = key + let configLabelKey: string = key; - if ( - key in payload && - typeof payload[key as keyof typeof payload] === "string" - ) { - configLabelKey = payload[key as keyof typeof payload] as string + if (key in payload && typeof payload[key as keyof typeof payload] === "string") { + configLabelKey = payload[key as keyof typeof payload] as string; } else if ( payloadPayload && key in payloadPayload && typeof payloadPayload[key as keyof typeof payloadPayload] === "string" ) { - configLabelKey = payloadPayload[ - key as keyof typeof payloadPayload - ] as string + configLabelKey = payloadPayload[key as keyof typeof payloadPayload] as string; } - return configLabelKey in config - ? config[configLabelKey] - : config[key as keyof typeof config] + return configLabelKey in config ? config[configLabelKey] : config[key as keyof typeof config]; } export { @@ -351,4 +320,4 @@ export { ChartLegend, ChartLegendContent, ChartStyle, -} +}; diff --git a/apps/local-dashboard/src/components/ui/drawer.tsx b/apps/local-dashboard/src/components/ui/drawer.tsx index 839e4e50..de1a8b31 100644 --- a/apps/local-dashboard/src/components/ui/drawer.tsx +++ b/apps/local-dashboard/src/components/ui/drawer.tsx @@ -1,30 +1,22 @@ -import * as React from "react" -import { Drawer as DrawerPrimitive } from "vaul" +import * as React from "react"; +import { Drawer as DrawerPrimitive } from "vaul"; -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils"; -function Drawer({ - ...props -}: React.ComponentProps) { - return +function Drawer({ ...props }: React.ComponentProps) { + return ; } -function DrawerTrigger({ - ...props -}: React.ComponentProps) { - return +function DrawerTrigger({ ...props }: React.ComponentProps) { + return ; } -function DrawerPortal({ - ...props -}: React.ComponentProps) { - return +function DrawerPortal({ ...props }: React.ComponentProps) { + return ; } -function DrawerClose({ - ...props -}: React.ComponentProps) { - return +function DrawerClose({ ...props }: React.ComponentProps) { + return ; } function DrawerOverlay({ @@ -36,11 +28,11 @@ function DrawerOverlay({ data-slot="drawer-overlay" className={cn( "fixed inset-0 z-50 bg-black/10 supports-backdrop-filter:backdrop-blur-xs data-open:animate-in data-open:fade-in-0 data-closed:animate-out data-closed:fade-out-0", - className + className, )} {...props} /> - ) + ); } function DrawerContent({ @@ -55,7 +47,7 @@ function DrawerContent({ data-slot="drawer-content" className={cn( "group/drawer-content fixed z-50 flex h-auto flex-col bg-background text-sm data-[vaul-drawer-direction=bottom]:inset-x-0 data-[vaul-drawer-direction=bottom]:bottom-0 data-[vaul-drawer-direction=bottom]:mt-24 data-[vaul-drawer-direction=bottom]:max-h-[80vh] data-[vaul-drawer-direction=bottom]:rounded-t-xl data-[vaul-drawer-direction=bottom]:border-t data-[vaul-drawer-direction=left]:inset-y-0 data-[vaul-drawer-direction=left]:left-0 data-[vaul-drawer-direction=left]:w-3/4 data-[vaul-drawer-direction=left]:rounded-r-xl data-[vaul-drawer-direction=left]:border-r data-[vaul-drawer-direction=right]:inset-y-0 data-[vaul-drawer-direction=right]:right-0 data-[vaul-drawer-direction=right]:w-3/4 data-[vaul-drawer-direction=right]:rounded-l-xl data-[vaul-drawer-direction=right]:border-l data-[vaul-drawer-direction=top]:inset-x-0 data-[vaul-drawer-direction=top]:top-0 data-[vaul-drawer-direction=top]:mb-24 data-[vaul-drawer-direction=top]:max-h-[80vh] data-[vaul-drawer-direction=top]:rounded-b-xl data-[vaul-drawer-direction=top]:border-b data-[vaul-drawer-direction=left]:sm:max-w-sm data-[vaul-drawer-direction=right]:sm:max-w-sm", - className + className, )} {...props} > @@ -63,7 +55,7 @@ function DrawerContent({ {children} - ) + ); } function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) { @@ -72,11 +64,11 @@ function DrawerHeader({ className, ...props }: React.ComponentProps<"div">) { data-slot="drawer-header" className={cn( "flex flex-col gap-0.5 p-4 group-data-[vaul-drawer-direction=bottom]/drawer-content:text-center group-data-[vaul-drawer-direction=top]/drawer-content:text-center md:gap-0.5 md:text-left", - className + className, )} {...props} /> - ) + ); } function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) { @@ -86,20 +78,17 @@ function DrawerFooter({ className, ...props }: React.ComponentProps<"div">) { className={cn("mt-auto flex flex-col gap-2 p-4", className)} {...props} /> - ) + ); } -function DrawerTitle({ - className, - ...props -}: React.ComponentProps) { +function DrawerTitle({ className, ...props }: React.ComponentProps) { return ( - ) + ); } function DrawerDescription({ @@ -112,7 +101,7 @@ function DrawerDescription({ className={cn("text-sm text-muted-foreground", className)} {...props} /> - ) + ); } export { @@ -126,4 +115,4 @@ export { DrawerFooter, DrawerTitle, DrawerDescription, -} +}; diff --git a/apps/local-dashboard/src/components/ui/input.tsx b/apps/local-dashboard/src/components/ui/input.tsx index 7d21babb..a30bd44a 100644 --- a/apps/local-dashboard/src/components/ui/input.tsx +++ b/apps/local-dashboard/src/components/ui/input.tsx @@ -1,7 +1,7 @@ -import * as React from "react" -import { Input as InputPrimitive } from "@base-ui/react/input" +import { Input as InputPrimitive } from "@base-ui/react/input"; +import * as React from "react"; -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils"; function Input({ className, type, ...props }: React.ComponentProps<"input">) { return ( @@ -10,11 +10,11 @@ function Input({ className, type, ...props }: React.ComponentProps<"input">) { data-slot="input" className={cn( "h-8 w-full min-w-0 rounded-lg border border-input bg-transparent px-2.5 py-1 text-base transition-colors outline-none file:inline-flex file:h-6 file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 disabled:pointer-events-none disabled:cursor-not-allowed disabled:bg-input/50 disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 md:text-sm dark:bg-input/30 dark:disabled:bg-input/80 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40", - className + className, )} {...props} /> - ) + ); } -export { Input } +export { Input }; diff --git a/apps/local-dashboard/src/components/ui/separator.tsx b/apps/local-dashboard/src/components/ui/separator.tsx index 6e1369e4..f2c55c81 100644 --- a/apps/local-dashboard/src/components/ui/separator.tsx +++ b/apps/local-dashboard/src/components/ui/separator.tsx @@ -1,25 +1,21 @@ -"use client" +"use client"; -import { Separator as SeparatorPrimitive } from "@base-ui/react/separator" +import { Separator as SeparatorPrimitive } from "@base-ui/react/separator"; -import { cn } from "@/lib/utils" +import { cn } from "@/lib/utils"; -function Separator({ - className, - orientation = "horizontal", - ...props -}: SeparatorPrimitive.Props) { +function Separator({ className, orientation = "horizontal", ...props }: SeparatorPrimitive.Props) { return ( - ) + ); } -export { Separator } +export { Separator }; diff --git a/apps/local-dashboard/src/components/ui/sheet.tsx b/apps/local-dashboard/src/components/ui/sheet.tsx index 331c8b78..5c67994e 100644 --- a/apps/local-dashboard/src/components/ui/sheet.tsx +++ b/apps/local-dashboard/src/components/ui/sheet.tsx @@ -1,26 +1,26 @@ -"use client" +"use client"; -import * as React from "react" -import { Dialog as SheetPrimitive } from "@base-ui/react/dialog" +import { Dialog as SheetPrimitive } from "@base-ui/react/dialog"; +import { Button } from "@selftune/ui/primitives"; +import { XIcon } from "lucide-react"; +import * as React from "react"; -import { cn } from "@/lib/utils" -import { Button } from "@selftune/ui/primitives" -import { XIcon } from "lucide-react" +import { cn } from "@/lib/utils"; function Sheet({ ...props }: SheetPrimitive.Root.Props) { - return + return ; } function SheetTrigger({ ...props }: SheetPrimitive.Trigger.Props) { - return + return ; } function SheetClose({ ...props }: SheetPrimitive.Close.Props) { - return + return ; } function SheetPortal({ ...props }: SheetPrimitive.Portal.Props) { - return + return ; } function SheetOverlay({ className, ...props }: SheetPrimitive.Backdrop.Props) { @@ -29,11 +29,11 @@ function SheetOverlay({ className, ...props }: SheetPrimitive.Backdrop.Props) { data-slot="sheet-overlay" className={cn( "fixed inset-0 z-50 bg-black/10 transition-opacity duration-150 data-ending-style:opacity-0 data-starting-style:opacity-0 supports-backdrop-filter:backdrop-blur-xs", - className + className, )} {...props} /> - ) + ); } function SheetContent({ @@ -43,8 +43,8 @@ function SheetContent({ showCloseButton = true, ...props }: SheetPrimitive.Popup.Props & { - side?: "top" | "right" | "bottom" | "left" - showCloseButton?: boolean + side?: "top" | "right" | "bottom" | "left"; + showCloseButton?: boolean; }) { return ( @@ -54,7 +54,7 @@ function SheetContent({ data-side={side} className={cn( "fixed z-50 flex flex-col gap-4 bg-background bg-clip-padding text-sm shadow-lg transition duration-200 ease-in-out data-ending-style:opacity-0 data-starting-style:opacity-0 data-[side=bottom]:inset-x-0 data-[side=bottom]:bottom-0 data-[side=bottom]:h-auto data-[side=bottom]:border-t data-[side=bottom]:data-ending-style:translate-y-[2.5rem] data-[side=bottom]:data-starting-style:translate-y-[2.5rem] data-[side=left]:inset-y-0 data-[side=left]:left-0 data-[side=left]:h-full data-[side=left]:w-3/4 data-[side=left]:border-r data-[side=left]:data-ending-style:translate-x-[-2.5rem] data-[side=left]:data-starting-style:translate-x-[-2.5rem] data-[side=right]:inset-y-0 data-[side=right]:right-0 data-[side=right]:h-full data-[side=right]:w-3/4 data-[side=right]:border-l data-[side=right]:data-ending-style:translate-x-[2.5rem] data-[side=right]:data-starting-style:translate-x-[2.5rem] data-[side=top]:inset-x-0 data-[side=top]:top-0 data-[side=top]:h-auto data-[side=top]:border-b data-[side=top]:data-ending-style:translate-y-[-2.5rem] data-[side=top]:data-starting-style:translate-y-[-2.5rem] data-[side=left]:sm:max-w-sm data-[side=right]:sm:max-w-sm", - className + className, )} {...props} > @@ -62,22 +62,15 @@ function SheetContent({ {showCloseButton && ( - } + render={ - ) + ); } function SidebarRail({ className, ...props }: React.ComponentProps<"button">) { - const { toggleSidebar } = useSidebar() + const { toggleSidebar } = useSidebar(); return ( - ) + ); } export function Overview({ @@ -119,28 +136,28 @@ export function Overview({ onStatusFilterChange, overviewQuery, }: { - search: string - statusFilter: SkillHealthStatus | "ALL" - onStatusFilterChange: (v: SkillHealthStatus | "ALL") => void - overviewQuery: UseQueryResult + search: string; + statusFilter: SkillHealthStatus | "ALL"; + onStatusFilterChange: (v: SkillHealthStatus | "ALL") => void; + overviewQuery: UseQueryResult; }) { - const navigate = useNavigate() - const { data, isPending, isError, error, refetch } = overviewQuery - const orchestrateQuery = useOrchestrateRuns() + const navigate = useNavigate(); + const { data, isPending, isError, error, refetch } = overviewQuery; + const orchestrateQuery = useOrchestrateRuns(); - const cards = useMemo(() => (data ? deriveSkillCards(data.skills) : []), [data]) + const cards = useMemo(() => (data ? deriveSkillCards(data.skills) : []), [data]); const filteredCards = useMemo(() => { - let result = cards + let result = cards; if (search) { - const q = search.toLowerCase() - result = result.filter((c) => c.name.toLowerCase().includes(q)) + const q = search.toLowerCase(); + result = result.filter((c) => c.name.toLowerCase().includes(q)); } if (statusFilter !== "ALL") { - result = result.filter((c) => c.status === statusFilter) + result = result.filter((c) => c.status === statusFilter); } - return result - }, [cards, search, statusFilter]) + return result; + }, [cards, search, statusFilter]); if (isPending) { return ( @@ -159,40 +176,44 @@ export function Overview({ - ) + ); } if (isError) { return (
-

{error instanceof Error ? error.message : "Unknown error"}

+

+ {error instanceof Error ? error.message : "Unknown error"} +

- ) + ); } if (!data) { return (
-

No telemetry data found. Run some sessions first.

+

+ No telemetry data found. Run some sessions first. +

- ) + ); } - const { overview, skills } = data - const gradedSkills = skills.filter((s) => s.total_checks >= 5) + const { overview, skills } = data; + const gradedSkills = skills.filter((s) => s.total_checks >= 5); const avgPassRate = gradedSkills.length > 0 ? gradedSkills.reduce((sum, s) => sum + s.pass_rate, 0) / gradedSkills.length - : null + : null; const handleSelectProposal = (skillName: string, proposalId: string) => { - navigate(`/skills/${encodeURIComponent(skillName)}?proposal=${encodeURIComponent(proposalId)}`) - } + navigate(`/skills/${encodeURIComponent(skillName)}?proposal=${encodeURIComponent(proposalId)}`); + }; return (
@@ -209,11 +230,20 @@ export function Overview({ />
- ( - - {skill.name} - - )} /> + ( + + {skill.name} + + )} + />
@@ -236,5 +266,5 @@ export function Overview({
- ) + ); } diff --git a/apps/local-dashboard/src/pages/SkillReport.tsx b/apps/local-dashboard/src/pages/SkillReport.tsx index 195a596b..a49b3c22 100644 --- a/apps/local-dashboard/src/pages/SkillReport.tsx +++ b/apps/local-dashboard/src/pages/SkillReport.tsx @@ -1,5 +1,8 @@ -import { useEffect, useState } from "react" -import { Link, useParams, useSearchParams } from "react-router-dom" +import { EvolutionTimeline } from "@selftune/ui/components"; +import { EvidenceViewer } from "@selftune/ui/components"; +import { InfoTip } from "@selftune/ui/components"; +import { STATUS_CONFIG } from "@selftune/ui/lib"; +import { deriveStatus, formatRate, timeAgo } from "@selftune/ui/lib"; import { Badge, Button, @@ -22,14 +25,7 @@ import { Tooltip, TooltipContent, TooltipTrigger, -} from "@selftune/ui/primitives" -import { Skeleton } from "@/components/ui/skeleton" -import { EvolutionTimeline } from "@selftune/ui/components" -import { EvidenceViewer } from "@selftune/ui/components" -import { InfoTip } from "@selftune/ui/components" -import { useSkillReport } from "@/hooks/useSkillReport" -import { STATUS_CONFIG } from "@selftune/ui/lib" -import { deriveStatus, formatRate, timeAgo } from "@selftune/ui/lib" +} from "@selftune/ui/primitives"; import { AlertCircleIcon, ArrowLeftIcon, @@ -45,18 +41,20 @@ import { ClockIcon, AlertOctagonIcon, TargetIcon, - MessageSquareTextIcon, - ServerIcon, - FolderIcon, -} from "lucide-react" +} from "lucide-react"; +import { useEffect, useState } from "react"; +import { Link, useParams, useSearchParams } from "react-router-dom"; + +import { Skeleton } from "@/components/ui/skeleton"; +import { useSkillReport } from "@/hooks/useSkillReport"; function formatDuration(ms: number): string { - if (ms < 1000) return `${Math.round(ms)}ms` - const secs = ms / 1000 - if (secs < 60) return `${secs.toFixed(1)}s` - const mins = secs / 60 - if (mins < 60) return `${mins.toFixed(1)}m` - return `${(mins / 60).toFixed(1)}h` + if (ms < 1000) return `${Math.round(ms)}ms`; + const secs = ms / 1000; + if (secs < 60) return `${secs.toFixed(1)}s`; + const mins = secs / 60; + if (mins < 60) return `${mins.toFixed(1)}m`; + return `${(mins / 60).toFixed(1)}h`; } const ACTION_VARIANT: Record = { @@ -65,31 +63,41 @@ const ACTION_VARIANT: Record - defaultExpanded: boolean + timestamp: string | null; + session_id: string | null; + triggered: boolean; + query: string; + invocation_mode: string | null; + confidence: number | null; + tool_name: string | null; + agent_type: string | null; + }>; + defaultExpanded: boolean; }) { - const [expanded, setExpanded] = useState(defaultExpanded) - const ts = meta?.started_at ?? invocations[0]?.timestamp - const modeBreakdown = invocations.reduce((acc, inv) => { - const mode = inv.invocation_mode ?? "unknown" - acc[mode] = (acc[mode] ?? 0) + 1 - return acc - }, {} as Record) + const [expanded, setExpanded] = useState(defaultExpanded); + const ts = meta?.started_at ?? invocations[0]?.timestamp; + const modeBreakdown = invocations.reduce( + (acc, inv) => { + const mode = inv.invocation_mode ?? "unknown"; + acc[mode] = (acc[mode] ?? 0) + 1; + return acc; + }, + {} as Record, + ); return (
@@ -99,16 +107,27 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: { className="w-full flex items-center gap-3 px-4 py-3 text-left hover:bg-muted/40 active:bg-muted/60 transition-colors" onClick={() => setExpanded(!expanded)} > - +
- {invocations.length} invocation{invocations.length !== 1 ? "s" : ""} + + {invocations.length} invocation{invocations.length !== 1 ? "s" : ""} + {ts ? timeAgo(ts) : ""}
- {meta?.model && {meta.model}} + {meta?.model && ( + + {meta.model} + + )} {meta?.workspace_path && ( - + {meta.workspace_path.split("/").slice(-2).join("/")} )} @@ -124,7 +143,9 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: { ))}
)} - {sessionId.substring(0, 8)} + + {sessionId.substring(0, 8)} + {/* Invocation table — expanded */} @@ -137,13 +158,16 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: { Prompt - Mode + Mode{" "} + - Confidence + Confidence{" "} + - Agent + Agent{" "} + Time @@ -151,15 +175,24 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: { {invocations.map((inv, i) => ( - - {inv.query || No prompt recorded} + + {inv.query || ( + No prompt recorded + )} {!inv.triggered && ( - missed + + missed + )} {inv.invocation_mode ? ( - {inv.invocation_mode} + + {inv.invocation_mode} + ) : ( )} @@ -169,7 +202,9 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: { {inv.agent_type ? ( - {inv.agent_type} + + {inv.agent_type} + ) : ( )} @@ -184,20 +219,20 @@ function SessionGroup({ sessionId, meta, invocations, defaultExpanded }: {
)}
- ) + ); } export function SkillReport() { - const { name } = useParams<{ name: string }>() - const [searchParams, setSearchParams] = useSearchParams() - const { data, isPending, isError, error, refetch } = useSkillReport(name) + const { name } = useParams<{ name: string }>(); + const [searchParams, setSearchParams] = useSearchParams(); + const { data, isPending, isError, error, refetch } = useSkillReport(name); if (!name) { return (

No skill name provided

- ) + ); } if (isPending) { @@ -212,20 +247,22 @@ export function SkillReport() {
- ) + ); } if (isError) { return (
-

{error instanceof Error ? error.message : "Unknown error"}

+

+ {error instanceof Error ? error.message : "Unknown error"} +

- ) + ); } if (!data) { @@ -233,7 +270,7 @@ export function SkillReport() {

No data yet

- ) + ); } const isNotFound = @@ -244,7 +281,7 @@ export function SkillReport() { data.pending_proposals.length === 0 && (data.canonical_invocations?.length ?? 0) === 0 && (data.prompt_samples?.length ?? 0) === 0 && - (data.session_metadata?.length ?? 0) === 0 + (data.session_metadata?.length ?? 0) === 0; if (isNotFound) { return ( @@ -255,7 +292,7 @@ export function SkillReport() { Back to Overview - ) + ); } const { @@ -266,46 +303,53 @@ export function SkillReport() { canonical_invocations, duration_stats, selftune_stats, - prompt_samples, + prompt_samples: _prompt_samples, session_metadata, - } = data - const status = deriveStatus(usage.pass_rate, usage.total_checks) - const config = STATUS_CONFIG[status] ?? STATUS_CONFIG.UNKNOWN - const passRateGood = status === "HEALTHY" - const hasEvolution = (selftune_stats?.run_count ?? 0) > 0 - const missed = duration_stats?.missed_triggers ?? 0 - - const proposalIds = new Set(evolution.map((entry) => entry.proposal_id)) - const requestedProposal = searchParams.get("proposal") - const activeProposal = requestedProposal && proposalIds.has(requestedProposal) - ? requestedProposal - : (evolution.length > 0 ? evolution[0].proposal_id : null) + } = data; + const status = deriveStatus(usage.pass_rate, usage.total_checks); + const config = STATUS_CONFIG[status] ?? STATUS_CONFIG.UNKNOWN; + const passRateGood = status === "HEALTHY"; + const hasEvolution = (selftune_stats?.run_count ?? 0) > 0; + const missed = duration_stats?.missed_triggers ?? 0; + + const proposalIds = new Set(evolution.map((entry) => entry.proposal_id)); + const requestedProposal = searchParams.get("proposal"); + const activeProposal = + requestedProposal && proposalIds.has(requestedProposal) + ? requestedProposal + : evolution.length > 0 + ? evolution[0].proposal_id + : null; useEffect(() => { - const current = searchParams.get("proposal") + const current = searchParams.get("proposal"); if (activeProposal && current !== activeProposal) { - const next = new URLSearchParams(searchParams) - next.set("proposal", activeProposal) - setSearchParams(next, { replace: true }) - return + const next = new URLSearchParams(searchParams); + next.set("proposal", activeProposal); + setSearchParams(next, { replace: true }); + return; } if (!activeProposal && current) { - const next = new URLSearchParams(searchParams) - next.delete("proposal") - setSearchParams(next, { replace: true }) + const next = new URLSearchParams(searchParams); + next.delete("proposal"); + setSearchParams(next, { replace: true }); } - }, [activeProposal, searchParams, setSearchParams]) + }, [activeProposal, searchParams, setSearchParams]); const handleSelectProposal = (proposalId: string) => { - const next = new URLSearchParams(searchParams) - next.set("proposal", proposalId) - setSearchParams(next, { replace: true }) - } + const next = new URLSearchParams(searchParams); + next.set("proposal", proposalId); + setSearchParams(next, { replace: true }); + }; // Unique models/platforms from session metadata - const uniqueModels = [...new Set((session_metadata ?? []).map((s) => s.model).filter(Boolean))] - const uniquePlatforms = [...new Set((session_metadata ?? []).map((s) => s.platform).filter(Boolean))] - const uniqueDirectories = [...new Set((session_metadata ?? []).map((s) => s.workspace_path).filter(Boolean))] + const _uniqueModels = [...new Set((session_metadata ?? []).map((s) => s.model).filter(Boolean))]; + const _uniquePlatforms = [ + ...new Set((session_metadata ?? []).map((s) => s.platform).filter(Boolean)), + ]; + const _uniqueDirectories = [ + ...new Set((session_metadata ?? []).map((s) => s.workspace_path).filter(Boolean)), + ]; // Unified invocations from consolidated skill_invocations table const mergedInvocations = (canonical_invocations ?? []).map((ci) => ({ @@ -318,225 +362,246 @@ export function SkillReport() { confidence: ci.confidence ?? null, tool_name: ci.tool_name ?? null, agent_type: ci.agent_type ?? null, - })) - mergedInvocations.sort((a, b) => (b.timestamp ?? "").localeCompare(a.timestamp ?? "")) + })); + mergedInvocations.sort((a, b) => (b.timestamp ?? "").localeCompare(a.timestamp ?? "")); // Group invocations by session for the grouped view - const sessionMap = new Map() + const sessionMap = new Map(); for (const inv of mergedInvocations) { - const sid = inv.session_id ?? "unknown" - const arr = sessionMap.get(sid) - if (arr) arr.push(inv) - else sessionMap.set(sid, [inv]) + const sid = inv.session_id ?? "unknown"; + const arr = sessionMap.get(sid); + if (arr) arr.push(inv); + else sessionMap.set(sid, [inv]); } - const sessionMetaMap = new Map( - (session_metadata ?? []).map((s) => [s.session_id, s]) - ) + const sessionMetaMap = new Map((session_metadata ?? []).map((s) => [s.session_id, s])); // Sort session groups by most recent invocation - const groupedSessions = [...sessionMap.entries()].sort( - ([, a], [, b]) => (b[0]?.timestamp ?? "").localeCompare(a[0]?.timestamp ?? "") - ) + const groupedSessions = [...sessionMap.entries()].sort(([, a], [, b]) => + (b[0]?.timestamp ?? "").localeCompare(a[0]?.timestamp ?? ""), + ); return ( 0 ? "evidence" : "invocations"}> -
- {/* Skill Header + Tab Bar — sticky, Linear-style compact */} -
-

{data.skill_name}

- - {config.icon} - {config.label} - - - {evolution.length > 0 && ( - - }> - Evidence - {activeProposal && ( - - #{activeProposal.slice(0, 8)} - - )} - - Change history and validation results - - )} - - }> - Invocations - {mergedInvocations.length} - - Recent skill triggers and their outcomes - - {pending_proposals.length > 0 && ( +
+ {/* Skill Header + Tab Bar — sticky, Linear-style compact */} +
+

+ {data.skill_name} +

+ + {config.icon} + {config.label} + + + {evolution.length > 0 && ( + + }> + Evidence + {activeProposal && ( + + #{activeProposal.slice(0, 8)} + + )} + + Change history and validation results + + )} - }> - Pending - {pending_proposals.length} + }> + Invocations + + {mergedInvocations.length} + - Proposals awaiting review + Recent skill triggers and their outcomes - )} - -
+ {pending_proposals.length > 0 && ( + + }> + Pending + + {pending_proposals.length} + + + Proposals awaiting review + + )} + +
- {/* KPIs — 2 rows of 4 */} -
- {/* Row 1: Core metrics */} - - - - - Trigger Rate - - - 0 && !passRateGood ? "text-red-600" : ""}`}> - {usage.total_checks > 0 ? formatRate(usage.pass_rate) : "--"} - - - {usage.total_checks > 0 ? ( - - {passRateGood ? : } - {formatRate(usage.pass_rate)} - - ) : ( - no checks yet - )} - - - - - - - - - Total Checks - - - - {usage.total_checks} - - - - - - - - - Triggered - - - - {usage.triggered_count} - - - - - - - - - Sessions - - - - {data.sessions_with_skill} - - - - - {/* Row 2: Selftune resource metrics */} - - - - - LLM Calls - - - - {hasEvolution ? (selftune_stats?.total_llm_calls ?? 0) : "--"} - - - {hasEvolution ? ( - - {selftune_stats?.run_count ?? 0} evolution runs - - ) : ( - no evolution runs yet - )} - - - - - - - - - Avg Duration - - - - {hasEvolution ? formatDuration(selftune_stats?.avg_elapsed_ms ?? 0) : "--"} - - - {hasEvolution ? ( - - {formatDuration(selftune_stats?.total_elapsed_ms ?? 0)} total - - ) : ( - no evolution runs yet - )} - - - - - - - - - Missed Triggers - - - 0 ? "text-amber-600" : ""}`}> - {missed} - - - - - - - - - Avg Confidence - - - - {(() => { - const withConfidence = mergedInvocations.filter((i) => i.confidence !== null); - return withConfidence.length > 0 - ? formatRate(withConfidence.reduce((sum, i) => sum + (i.confidence ?? 0), 0) / withConfidence.length) - : "--"; - })()} - - - -
+ {/* KPIs — 2 rows of 4 */} +
+ {/* Row 1: Core metrics */} + + + + + Trigger Rate + + + 0 && !passRateGood ? "text-red-600" : ""}`} + > + {usage.total_checks > 0 ? formatRate(usage.pass_rate) : "--"} + + + {usage.total_checks > 0 ? ( + + {passRateGood ? ( + + ) : ( + + )} + {formatRate(usage.pass_rate)} + + ) : ( + + no checks yet + + )} + + + + + + + + + Total Checks + + + + {usage.total_checks} + + + + + + + + + Triggered + + + + {usage.triggered_count} + + + + + + + + + Sessions + + + + {data.sessions_with_skill} + + + + + {/* Row 2: Selftune resource metrics */} + + + + + LLM Calls + + + + {hasEvolution ? (selftune_stats?.total_llm_calls ?? 0) : "--"} + + + {hasEvolution ? ( + + {selftune_stats?.run_count ?? 0} evolution runs + + ) : ( + + no evolution runs yet + + )} + + + + + + + + + Avg Duration + + + + {hasEvolution ? formatDuration(selftune_stats?.avg_elapsed_ms ?? 0) : "--"} + + + {hasEvolution ? ( + + {formatDuration(selftune_stats?.total_elapsed_ms ?? 0)} total + + ) : ( + + no evolution runs yet + + )} + + + + + + + + + Missed Triggers + + + 0 ? "text-amber-600" : ""}`} + > + {missed} + + + + + + + + + Avg Confidence + + + + {(() => { + const withConfidence = mergedInvocations.filter((i) => i.confidence !== null); + return withConfidence.length > 0 + ? formatRate( + withConfidence.reduce((sum, i) => sum + (i.confidence ?? 0), 0) / + withConfidence.length, + ) + : "--"; + })()} + + + +
- {/* Main content: sidebar timeline + tabbed detail */} -
- {/* Left sidebar: Evolution Timeline — sticky so it stays visible while scrolling */} - {evolution.length > 0 && ( - - )} + {/* Main content: sidebar timeline + tabbed detail */} +
+ {/* Left sidebar: Evolution Timeline — sticky so it stays visible while scrolling */} + {evolution.length > 0 && ( + + )} - {/* Right content area */} -
+ {/* Right content area */} +
{/* Evidence tab */} {evolution.length > 0 && ( @@ -548,7 +613,9 @@ export function SkillReport() { /> ) : (
-

Select a proposal from the timeline

+

+ Select a proposal from the timeline +

)}
@@ -559,22 +626,42 @@ export function SkillReport() { {mergedInvocations.length === 0 ? ( -

No invocation records yet.

+

+ No invocation records yet. +

) : (
{/* Legend */}
- {mergedInvocations.length} invocations across {groupedSessions.length} sessions + + {mergedInvocations.length} invocations across {groupedSessions.length}{" "} + sessions +
- explicit user typed /skill - implicit mentioned by name - inferred agent chose autonomously + + + explicit + {" "} + user typed /skill + + + + implicit + {" "} + mentioned by name + + + + inferred + {" "} + agent chose autonomously +
{groupedSessions.map(([sessionId, invocations], idx) => { - const meta = sessionMetaMap.get(sessionId) + const meta = sessionMetaMap.get(sessionId); return ( - ) + ); })}
)} - {/* Pending tab */} {pending_proposals.length > 0 && ( @@ -609,10 +695,15 @@ export function SkillReport() {
- + {p.action} - {timeAgo(p.timestamp)} + + {timeAgo(p.timestamp)} + #{p.proposal_id.slice(0, 8)} @@ -625,9 +716,9 @@ export function SkillReport() { )} +
-
- ) + ); } diff --git a/apps/local-dashboard/src/pages/Status.tsx b/apps/local-dashboard/src/pages/Status.tsx index 4958292f..b9904f07 100644 --- a/apps/local-dashboard/src/pages/Status.tsx +++ b/apps/local-dashboard/src/pages/Status.tsx @@ -1,3 +1,5 @@ +import { InfoTip } from "@selftune/ui/components"; +import { timeAgo } from "@selftune/ui/lib"; import { Badge, Button, @@ -7,12 +9,7 @@ import { CardDescription, CardHeader, CardTitle, -} from "@selftune/ui/primitives" -import { Skeleton } from "@/components/ui/skeleton" -import { InfoTip } from "@selftune/ui/components" -import { useDoctor } from "@/hooks/useDoctor" -import { timeAgo } from "@selftune/ui/lib" -import type { HealthCheck, HealthStatus } from "@/types" +} from "@selftune/ui/primitives"; import { AlertCircleIcon, AlertTriangleIcon, @@ -25,9 +22,20 @@ import { SettingsIcon, ShieldCheckIcon, XCircleIcon, -} from "lucide-react" +} from "lucide-react"; -const STATUS_DISPLAY: Record = { +import { Skeleton } from "@/components/ui/skeleton"; +import { useDoctor } from "@/hooks/useDoctor"; +import type { HealthCheck, HealthStatus } from "@/types"; + +const STATUS_DISPLAY: Record< + HealthStatus, + { + icon: React.ReactNode; + variant: "default" | "secondary" | "destructive" | "outline"; + label: string; + } +> = { pass: { icon: , variant: "outline", @@ -43,7 +51,7 @@ const STATUS_DISPLAY: Record = { config: { @@ -87,19 +95,19 @@ const CHECK_META: Record, }, -} +}; function CheckCard({ check }: { check: HealthCheck }) { const meta = CHECK_META[check.name] ?? { label: check.name, description: "", icon: , - } + }; const display = STATUS_DISPLAY[check.status] ?? { icon: , variant: "outline" as const, label: check.status, - } + }; return ( @@ -109,9 +117,7 @@ function CheckCard({ check }: { check: HealthCheck }) { {meta.label} {meta.description && } - - {check.message || "No details"} - + {check.message || "No details"} {display.icon} @@ -125,11 +131,11 @@ function CheckCard({ check }: { check: HealthCheck }) { )} - ) + ); } export function Status() { - const { data, isPending, isError, error, refetch } = useDoctor() + const { data, isPending, isError, error, refetch } = useDoctor(); if (isPending) { return ( @@ -141,20 +147,22 @@ export function Status() { ))}
- ) + ); } if (isError) { return (
-

{error instanceof Error ? error.message : "Unknown error"}

+

+ {error instanceof Error ? error.message : "Unknown error"} +

- ) + ); } if (!data) { @@ -162,28 +170,28 @@ export function Status() {

No diagnostics data available.

- ) + ); } - const { checks: rawChecks, summary: rawSummary, healthy = false, timestamp } = data - const checks = rawChecks ?? [] - const summary = rawSummary ?? { pass: 0, warn: 0, fail: 0 } - const freshnessCheck = checks.find((c) => c.name === "dashboard_freshness_mode") + const { checks: rawChecks, summary: rawSummary, healthy = false, timestamp } = data; + const checks = rawChecks ?? []; + const summary = rawSummary ?? { pass: 0, warn: 0, fail: 0 }; + const freshnessCheck = checks.find((c) => c.name === "dashboard_freshness_mode"); // Group checks by category - const configChecks = checks.filter((c) => c.name === "config") - const logChecks = checks.filter((c) => c.name.startsWith("log_")) - const hookChecks = checks.filter((c) => c.name === "hook_settings") - const evolutionChecks = checks.filter((c) => c.name === "evolution_audit") - const integrityChecks = checks.filter((c) => c.name === "dashboard_freshness_mode") + const configChecks = checks.filter((c) => c.name === "config"); + const logChecks = checks.filter((c) => c.name.startsWith("log_")); + const hookChecks = checks.filter((c) => c.name === "hook_settings"); + const evolutionChecks = checks.filter((c) => c.name === "evolution_audit"); + const integrityChecks = checks.filter((c) => c.name === "dashboard_freshness_mode"); const knownNames = new Set([ "config", ...logChecks.map((c) => c.name), "hook_settings", "evolution_audit", "dashboard_freshness_mode", - ]) - const otherChecks = checks.filter((c) => !knownNames.has(c.name)) + ]); + const otherChecks = checks.filter((c) => !knownNames.has(c.name)); const groups = [ { title: "Configuration", checks: configChecks }, @@ -192,7 +200,7 @@ export function Status() { { title: "Evolution", checks: evolutionChecks }, { title: "Integrity", checks: integrityChecks }, { title: "Other", checks: otherChecks }, - ].filter((g) => g.checks.length > 0) + ].filter((g) => g.checks.length > 0); return (
@@ -211,7 +219,14 @@ export function Status() { Last checked {timestamp ? timeAgo(timestamp) : "—"} -
@@ -249,7 +264,9 @@ export function Status() { Warnings - 0 ? "text-amber-500" : ""}`}> + 0 ? "text-amber-500" : ""}`} + > {summary.warn} @@ -260,7 +277,9 @@ export function Status() { Failed - 0 ? "text-red-600" : ""}`}> + 0 ? "text-red-600" : ""}`} + > {summary.fail} @@ -279,5 +298,5 @@ export function Status() {
))}
- ) + ); } diff --git a/apps/local-dashboard/src/styles.css b/apps/local-dashboard/src/styles.css index 3852a807..ab122aa8 100644 --- a/apps/local-dashboard/src/styles.css +++ b/apps/local-dashboard/src/styles.css @@ -79,7 +79,7 @@ } @theme inline { - --font-sans: 'Geist Variable', sans-serif; + --font-sans: "Geist Variable", sans-serif; --color-background: var(--background); --color-foreground: var(--foreground); --color-card: var(--card); @@ -152,10 +152,18 @@ margin-bottom: 0.5em; line-height: 1.3; } -.skill-markdown h1 { font-size: 1.125rem; } -.skill-markdown h2 { font-size: 1rem; } -.skill-markdown h3 { font-size: 0.875rem; } -.skill-markdown h4 { font-size: 0.8125rem; } +.skill-markdown h1 { + font-size: 1.125rem; +} +.skill-markdown h2 { + font-size: 1rem; +} +.skill-markdown h3 { + font-size: 0.875rem; +} +.skill-markdown h4 { + font-size: 0.8125rem; +} .skill-markdown p { margin-top: 0.5em; margin-bottom: 0.5em; @@ -170,8 +178,12 @@ margin-top: 0.25em; margin-bottom: 0.25em; } -.skill-markdown ul { list-style-type: disc; } -.skill-markdown ol { list-style-type: decimal; } +.skill-markdown ul { + list-style-type: disc; +} +.skill-markdown ol { + list-style-type: decimal; +} .skill-markdown code { font-size: 0.75rem; background: var(--muted); @@ -198,8 +210,12 @@ margin-top: 0.75em; margin-bottom: 0.75em; } -.skill-markdown strong { font-weight: 600; } -.skill-markdown em { font-style: italic; } +.skill-markdown strong { + font-weight: 600; +} +.skill-markdown em { + font-style: italic; +} .skill-markdown hr { border: none; border-top: 1px solid var(--border); diff --git a/apps/local-dashboard/vite.config.ts b/apps/local-dashboard/vite.config.ts index 8369e636..d839747a 100644 --- a/apps/local-dashboard/vite.config.ts +++ b/apps/local-dashboard/vite.config.ts @@ -1,4 +1,5 @@ import { fileURLToPath } from "node:url"; + import tailwindcss from "@tailwindcss/vite"; import react from "@vitejs/plugin-react"; import { defineConfig } from "vitest/config"; diff --git a/bin/selftune.cjs b/bin/selftune.cjs index b6589253..18d62ac1 100755 --- a/bin/selftune.cjs +++ b/bin/selftune.cjs @@ -26,6 +26,6 @@ for (const [cmd, args] of runners) { console.error( JSON.stringify({ error: "No TypeScript runtime found. Install bun (https://bun.sh) or tsx (npx tsx).", - }) + }), ); process.exit(1); diff --git a/biome.json b/biome.json deleted file mode 100644 index 7bf6d70a..00000000 --- a/biome.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "$schema": "https://biomejs.dev/schemas/2.4.8/schema.json", - "assist": { "actions": { "source": { "organizeImports": "on" } } }, - "linter": { - "enabled": true, - "rules": { - "recommended": true - } - }, - "formatter": { - "indentStyle": "space", - "indentWidth": 2, - "lineWidth": 100 - }, - "files": { - "includes": [ - "**/cli/**/*.ts", - "**/tests/**/*.ts", - "**/*.ts", - "**/*.json", - "**/*.md", - "!**/.agent/skills", - "!**/.claude/skills", - "!**/.claude/worktrees", - "!**/test-results", - "!**/node_modules", - "!**/bun.lock" - ] - } -} diff --git a/bun.lock b/bun.lock index 1de894f3..1c20fd75 100644 --- a/bun.lock +++ b/bun.lock @@ -8,8 +8,9 @@ "@selftune/telemetry-contract": "file:packages/telemetry-contract", }, "devDependencies": { - "@biomejs/biome": "^2.4.7", "@types/bun": "^1.1.0", + "oxfmt": "^0.41.0", + "oxlint": "^1.56.0", }, }, "apps/local-dashboard": { @@ -164,24 +165,6 @@ "@base-ui/utils": ["@base-ui/utils@0.2.6", "", { "dependencies": { "@babel/runtime": "^7.28.6", "@floating-ui/utils": "^0.2.11", "reselect": "^5.1.1", "use-sync-external-store": "^1.6.0" }, "peerDependencies": { "@types/react": "^17 || ^18 || ^19", "react": "^17 || ^18 || ^19", "react-dom": "^17 || ^18 || ^19" }, "optionalPeers": ["@types/react"] }, "sha512-yQ+qeuqohwhsNpoYDqqXaLllYAkPCP4vYdDrVo8FQXaAPfHWm1pG/Vm+jmGTA5JFS0BAIjookyapuJFY8F9PIw=="], - "@biomejs/biome": ["@biomejs/biome@2.4.8", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.4.8", "@biomejs/cli-darwin-x64": "2.4.8", "@biomejs/cli-linux-arm64": "2.4.8", "@biomejs/cli-linux-arm64-musl": "2.4.8", "@biomejs/cli-linux-x64": "2.4.8", "@biomejs/cli-linux-x64-musl": "2.4.8", "@biomejs/cli-win32-arm64": "2.4.8", "@biomejs/cli-win32-x64": "2.4.8" }, "bin": { "biome": "bin/biome" } }, "sha512-ponn0oKOky1oRXBV+rlSaUlixUxf1aZvWC19Z41zBfUOUesthrQqL3OtiAlSB1EjFjyWpn98Q64DHelhA6jNlA=="], - - "@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@2.4.8", "", { "os": "darwin", "cpu": "arm64" }, "sha512-ARx0tECE8I7S2C2yjnWYLNbBdDoPdq3oyNLhMglmuctThwUsuzFWRKrHmIGwIRWKz0Mat9DuzLEDp52hGnrxGQ=="], - - "@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@2.4.8", "", { "os": "darwin", "cpu": "x64" }, "sha512-Jg9/PsB9vDCJlANE8uhG7qDhb5w0Ix69D7XIIc8IfZPUoiPrbLm33k2Ig3NOJ/7nb3UbesFz3D1aDKm9DvzjhQ=="], - - "@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@2.4.8", "", { "os": "linux", "cpu": "arm64" }, "sha512-5CdrsJct76XG2hpKFwXnEtlT1p+4g4yV+XvvwBpzKsTNLO9c6iLlAxwcae2BJ7ekPGWjNGw9j09T5KGPKKxQig=="], - - "@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@2.4.8", "", { "os": "linux", "cpu": "arm64" }, "sha512-Zo9OhBQDJ3IBGPlqHiTISloo5H0+FBIpemqIJdW/0edJ+gEcLR+MZeZozcUyz3o1nXkVA7++DdRKQT0599j9jA=="], - - "@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@2.4.8", "", { "os": "linux", "cpu": "x64" }, "sha512-PdKXspVEaMCQLjtZCn6vfSck/li4KX9KGwSDbZdgIqlrizJ2MnMcE3TvHa2tVfXNmbjMikzcfJpuPWH695yJrw=="], - - "@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@2.4.8", "", { "os": "linux", "cpu": "x64" }, "sha512-Gi8quv8MEuDdKaPFtS2XjEnMqODPsRg6POT6KhoP+VrkNb+T2ywunVB+TvOU0LX1jAZzfBr+3V1mIbBhzAMKvw=="], - - "@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@2.4.8", "", { "os": "win32", "cpu": "arm64" }, "sha512-LoFatS0tnHv6KkCVpIy3qZCih+MxUMvdYiPWLHRri7mhi2vyOOs8OrbZBcLTUEWCS+ktO72nZMy4F96oMhkOHQ=="], - - "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.4.8", "", { "os": "win32", "cpu": "x64" }, "sha512-vAn7iXDoUbqFXqVocuq1sMYAd33p8+mmurqJkWl6CtIhobd/O6moe4rY5AJvzbunn/qZCdiDVcveqtkFh1e7Hg=="], - "@dnd-kit/accessibility": ["@dnd-kit/accessibility@3.1.1", "", { "dependencies": { "tslib": "^2.0.0" }, "peerDependencies": { "react": ">=16.8.0" } }, "sha512-2P+YgaXF+gRsIihwwY1gCsQSYnu9Zyj2py8kY5fFvUM1qm2WA2u639R6YNVfU4GWr+ZM5mqEsfHZZLoRONbemw=="], "@dnd-kit/core": ["@dnd-kit/core@6.3.1", "", { "dependencies": { "@dnd-kit/accessibility": "^3.1.1", "@dnd-kit/utilities": "^3.2.2", "tslib": "^2.0.0" }, "peerDependencies": { "react": ">=16.8.0", "react-dom": ">=16.8.0" } }, "sha512-xkGBRQQab4RLwgXxoqETICr6S5JlogafbhNsidmrkVv2YRs5MLwpjoF2qpiGjQt8S9AoxtIV603s0GIUpY5eYQ=="], @@ -192,7 +175,7 @@ "@dnd-kit/utilities": ["@dnd-kit/utilities@3.2.2", "", { "dependencies": { "tslib": "^2.0.0" }, "peerDependencies": { "react": ">=16.8.0" } }, "sha512-+MKAJEOfaBe5SmV6t34p80MMKhjvUz0vRrvVJbPT0WElzaOJ/1xs+D+KDv+tD/NE5ujfrChEcshd4fLn0wpiqg=="], - "@dotenvx/dotenvx": ["@dotenvx/dotenvx@1.57.0", "", { "dependencies": { "commander": "^11.1.0", "dotenv": "^17.2.1", "eciesjs": "^0.4.10", "execa": "^5.1.1", "fdir": "^6.2.0", "ignore": "^5.3.0", "object-treeify": "1.1.33", "picomatch": "^4.0.2", "which": "^4.0.0" }, "bin": { "dotenvx": "src/cli/dotenvx.js" } }, "sha512-WsTEcqfHzKmLFZh3jLGd7o4iCkrIupp+qFH2FJUJtQXUh2GcOnLXD00DcrhlO4H8QSmaKnW9lugOEbrdpu25kA=="], + "@dotenvx/dotenvx": ["@dotenvx/dotenvx@1.57.1", "", { "dependencies": { "commander": "^11.1.0", "dotenv": "^17.2.1", "eciesjs": "^0.4.10", "execa": "^5.1.1", "fdir": "^6.2.0", "ignore": "^5.3.0", "object-treeify": "1.1.33", "picomatch": "^4.0.2", "which": "^4.0.0" }, "bin": { "dotenvx": "src/cli/dotenvx.js" } }, "sha512-iKXuo8Nes9Ft4zF3AZOT4FHkl6OV8bHqn61a67qHokkBzSEurnKZAlOkT0FYrRNVGvE6nCfZMtYswyjfXCR1MQ=="], "@ecies/ciphers": ["@ecies/ciphers@0.2.5", "", { "peerDependencies": { "@noble/ciphers": "^1.0.0" } }, "sha512-GalEZH4JgOMHYYcYmVqnFirFsjZHeoGMDt9IxEnM9F7GRUUyUksJ7Ou53L83WHJq3RWKD3AcBpo0iQh0oMpf8A=="], @@ -302,6 +285,82 @@ "@open-draft/until": ["@open-draft/until@2.1.0", "", {}, "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg=="], + "@oxfmt/binding-android-arm-eabi": ["@oxfmt/binding-android-arm-eabi@0.41.0", "", { "os": "android", "cpu": "arm" }, "sha512-REfrqeMKGkfMP+m/ScX4f5jJBSmVNYcpoDF8vP8f8eYPDuPGZmzp56NIUsYmx3h7f6NzC6cE3gqh8GDWrJHCKw=="], + + "@oxfmt/binding-android-arm64": ["@oxfmt/binding-android-arm64@0.41.0", "", { "os": "android", "cpu": "arm64" }, "sha512-s0b1dxNgb2KomspFV2LfogC2XtSJB42POXF4bMCLJyvQmAGos4ZtjGPfQreToQEaY0FQFjz3030ggI36rF1q5g=="], + + "@oxfmt/binding-darwin-arm64": ["@oxfmt/binding-darwin-arm64@0.41.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-EGXGualADbv/ZmamE7/2DbsrYmjoPlAmHEpTL4vapLF4EfVD6fr8/uQDFnPJkUBjiSWFJZtFNsGeN1B6V3owmA=="], + + "@oxfmt/binding-darwin-x64": ["@oxfmt/binding-darwin-x64@0.41.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-WxySJEvdQQYMmyvISH3qDpTvoS0ebnIP63IMxLLWowJyPp/AAH0hdWtlo+iGNK5y3eVfa5jZguwNaQkDKWpGSw=="], + + "@oxfmt/binding-freebsd-x64": ["@oxfmt/binding-freebsd-x64@0.41.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-Y2kzMkv3U3oyuYaR4wTfGjOTYTXiFC/hXmG0yVASKkbh02BJkvD98Ij8bIevr45hNZ0DmZEgqiXF+9buD4yMYQ=="], + + "@oxfmt/binding-linux-arm-gnueabihf": ["@oxfmt/binding-linux-arm-gnueabihf@0.41.0", "", { "os": "linux", "cpu": "arm" }, "sha512-ptazDjdUyhket01IjPTT6ULS1KFuBfTUU97osTP96X5y/0oso+AgAaJzuH81oP0+XXyrWIHbRzozSAuQm4p48g=="], + + "@oxfmt/binding-linux-arm-musleabihf": ["@oxfmt/binding-linux-arm-musleabihf@0.41.0", "", { "os": "linux", "cpu": "arm" }, "sha512-UkoL2OKxFD+56bPEBcdGn+4juTW4HRv/T6w1dIDLnvKKWr6DbarB/mtHXlADKlFiJubJz8pRkttOR7qjYR6lTA=="], + + "@oxfmt/binding-linux-arm64-gnu": ["@oxfmt/binding-linux-arm64-gnu@0.41.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-gofu0PuumSOHYczD8p62CPY4UF6ee+rSLZJdUXkpwxg6pILiwSDBIouPskjF/5nF3A7QZTz2O9KFNkNxxFN9tA=="], + + "@oxfmt/binding-linux-arm64-musl": ["@oxfmt/binding-linux-arm64-musl@0.41.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-VfVZxL0+6RU86T8F8vKiDBa+iHsr8PAjQmKGBzSCAX70b6x+UOMFl+2dNihmKmUwqkCazCPfYjt6SuAPOeQJ3g=="], + + "@oxfmt/binding-linux-ppc64-gnu": ["@oxfmt/binding-linux-ppc64-gnu@0.41.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-bwzokz2eGvdfJbc0i+zXMJ4BBjQPqg13jyWpEEZDOrBCQ91r8KeY2Mi2kUeuMTZNFXju+jcAbAbpyJxRGla0eg=="], + + "@oxfmt/binding-linux-riscv64-gnu": ["@oxfmt/binding-linux-riscv64-gnu@0.41.0", "", { "os": "linux", "cpu": "none" }, "sha512-POLM//PCH9uqDeNDwWL3b3DkMmI3oI2cU6hwc2lnztD1o7dzrQs3R9nq555BZ6wI7t2lyhT9CS+CRaz5X0XqLA=="], + + "@oxfmt/binding-linux-riscv64-musl": ["@oxfmt/binding-linux-riscv64-musl@0.41.0", "", { "os": "linux", "cpu": "none" }, "sha512-NNK7PzhFqLUwx/G12Xtm6scGv7UITvyGdAR5Y+TlqsG+essnuRWR4jRNODWRjzLZod0T3SayRbnkSIWMBov33w=="], + + "@oxfmt/binding-linux-s390x-gnu": ["@oxfmt/binding-linux-s390x-gnu@0.41.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-qVf/zDC5cN9eKe4qI/O/m445er1IRl6swsSl7jHkqmOSVfknwCe5JXitYjZca+V/cNJSU/xPlC5EFMabMMFDpw=="], + + "@oxfmt/binding-linux-x64-gnu": ["@oxfmt/binding-linux-x64-gnu@0.41.0", "", { "os": "linux", "cpu": "x64" }, "sha512-ojxYWu7vUb6ysYqVCPHuAPVZHAI40gfZ0PDtZAMwVmh2f0V8ExpPIKoAKr7/8sNbAXJBBpZhs2coypIo2jJX4w=="], + + "@oxfmt/binding-linux-x64-musl": ["@oxfmt/binding-linux-x64-musl@0.41.0", "", { "os": "linux", "cpu": "x64" }, "sha512-O2exZLBxoCMIv2vlvcbkdedazJPTdG0VSup+0QUCfYQtx751zCZNboX2ZUOiQ/gDTdhtXvSiot0h6GEGkOyalA=="], + + "@oxfmt/binding-openharmony-arm64": ["@oxfmt/binding-openharmony-arm64@0.41.0", "", { "os": "none", "cpu": "arm64" }, "sha512-N+31/VoL+z+NNBt8viy3I4NaIdPbiYeOnB884LKqvXldaE2dRztdPv3q5ipfZYv0RwFp7JfqS4I27K/DSHCakg=="], + + "@oxfmt/binding-win32-arm64-msvc": ["@oxfmt/binding-win32-arm64-msvc@0.41.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-Z7NAtu/RN8kjCQ1y5oDD0nTAeRswh3GJ93qwcW51srmidP7XPBmZbLlwERu1W5veCevQJtPS9xmkpcDTYsGIwQ=="], + + "@oxfmt/binding-win32-ia32-msvc": ["@oxfmt/binding-win32-ia32-msvc@0.41.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-uNxxP3l4bJ6VyzIeRqCmBU2Q0SkCFgIhvx9/9dJ9V8t/v+jP1IBsuaLwCXGR8JPHtkj4tFp+RHtUmU2ZYAUpMA=="], + + "@oxfmt/binding-win32-x64-msvc": ["@oxfmt/binding-win32-x64-msvc@0.41.0", "", { "os": "win32", "cpu": "x64" }, "sha512-49ZSpbZ1noozyPapE8SUOSm3IN0Ze4b5nkO+4+7fq6oEYQQJFhE0saj5k/Gg4oewVPdjn0L3ZFeWk2Vehjcw7A=="], + + "@oxlint/binding-android-arm-eabi": ["@oxlint/binding-android-arm-eabi@1.56.0", "", { "os": "android", "cpu": "arm" }, "sha512-IyfYPthZyiSKwAv/dLjeO18SaK8MxLI9Yss2JrRDyweQAkuL3LhEy7pwIwI7uA3KQc1Vdn20kdmj3q0oUIQL6A=="], + + "@oxlint/binding-android-arm64": ["@oxlint/binding-android-arm64@1.56.0", "", { "os": "android", "cpu": "arm64" }, "sha512-Ga5zYrzH6vc/VFxhn6MmyUnYEfy9vRpwTIks99mY3j6Nz30yYpIkWryI0QKPCgvGUtDSXVLEaMum5nA+WrNOSg=="], + + "@oxlint/binding-darwin-arm64": ["@oxlint/binding-darwin-arm64@1.56.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-ogmbdJysnw/D4bDcpf1sPLpFThZ48lYp4aKYm10Z/6Nh1SON6NtnNhTNOlhEY296tDFItsZUz+2tgcSYqh8Eyw=="], + + "@oxlint/binding-darwin-x64": ["@oxlint/binding-darwin-x64@1.56.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-x8QE1h+RAtQ2g+3KPsP6Fk/tdz6zJQUv5c7fTrJxXV3GHOo+Ry5p/PsogU4U+iUZg0rj6hS+E4xi+mnwwlDCWQ=="], + + "@oxlint/binding-freebsd-x64": ["@oxlint/binding-freebsd-x64@1.56.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-6G+WMZvwJpMvY7my+/SHEjb7BTk/PFbePqLpmVmUJRIsJMy/UlyYqjpuh0RCgYYkPLcnXm1rUM04kbTk8yS1Yg=="], + + "@oxlint/binding-linux-arm-gnueabihf": ["@oxlint/binding-linux-arm-gnueabihf@1.56.0", "", { "os": "linux", "cpu": "arm" }, "sha512-YYHBsk/sl7fYwQOok+6W5lBPeUEvisznV/HZD2IfZmF3Bns6cPC3Z0vCtSEOaAWTjYWN3jVsdu55jMxKlsdlhg=="], + + "@oxlint/binding-linux-arm-musleabihf": ["@oxlint/binding-linux-arm-musleabihf@1.56.0", "", { "os": "linux", "cpu": "arm" }, "sha512-+AZK8rOUr78y8WT6XkDb04IbMRqauNV+vgT6f8ZLOH8wnpQ9i7Nol0XLxAu+Cq7Sb+J9wC0j6Km5hG8rj47/yQ=="], + + "@oxlint/binding-linux-arm64-gnu": ["@oxlint/binding-linux-arm64-gnu@1.56.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-urse2SnugwJRojUkGSSeH2LPMaje5Q50yQtvtL9HFckiyeqXzoFwOAZqD5TR29R2lq7UHidfFDM9EGcchcbb8A=="], + + "@oxlint/binding-linux-arm64-musl": ["@oxlint/binding-linux-arm64-musl@1.56.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-rkTZkBfJ4TYLjansjSzL6mgZOdN5IvUnSq3oNJSLwBcNvy3dlgQtpHPrRxrCEbbcp7oQ6If0tkNaqfOsphYZ9g=="], + + "@oxlint/binding-linux-ppc64-gnu": ["@oxlint/binding-linux-ppc64-gnu@1.56.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-uqL1kMH3u69/e1CH2EJhP3CP28jw2ExLsku4o8RVAZ7fySo9zOyI2fy9pVlTAp4voBLVgzndXi3SgtdyCTa2aA=="], + + "@oxlint/binding-linux-riscv64-gnu": ["@oxlint/binding-linux-riscv64-gnu@1.56.0", "", { "os": "linux", "cpu": "none" }, "sha512-j0CcMBOgV6KsRaBdsebIeiy7hCjEvq2KdEsiULf2LZqAq0v1M1lWjelhCV57LxsqaIGChXFuFJ0RiFrSRHPhSg=="], + + "@oxlint/binding-linux-riscv64-musl": ["@oxlint/binding-linux-riscv64-musl@1.56.0", "", { "os": "linux", "cpu": "none" }, "sha512-7VDOiL8cDG3DQ/CY3yKjbV1c4YPvc4vH8qW09Vv+5ukq3l/Kcyr6XGCd5NvxUmxqDb2vjMpM+eW/4JrEEsUetA=="], + + "@oxlint/binding-linux-s390x-gnu": ["@oxlint/binding-linux-s390x-gnu@1.56.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-JGRpX0M+ikD3WpwJ7vKcHKV6Kg0dT52BW2Eu2BupXotYeqGXBrbY+QPkAyKO6MNgKozyTNaRh3r7g+VWgyAQYQ=="], + + "@oxlint/binding-linux-x64-gnu": ["@oxlint/binding-linux-x64-gnu@1.56.0", "", { "os": "linux", "cpu": "x64" }, "sha512-dNaICPvtmuxFP/VbqdofrLqdS3bM/AKJN3LMJD52si44ea7Be1cBk6NpfIahaysG9Uo+L98QKddU9CD5L8UHnQ=="], + + "@oxlint/binding-linux-x64-musl": ["@oxlint/binding-linux-x64-musl@1.56.0", "", { "os": "linux", "cpu": "x64" }, "sha512-pF1vOtM+GuXmbklM1hV8WMsn6tCNPvkUzklj/Ej98JhlanbmA2RB1BILgOpwSuCTRTIYx2MXssmEyQQ90QF5aA=="], + + "@oxlint/binding-openharmony-arm64": ["@oxlint/binding-openharmony-arm64@1.56.0", "", { "os": "none", "cpu": "arm64" }, "sha512-bp8NQ4RE6fDIFLa4bdBiOA+TAvkNkg+rslR+AvvjlLTYXLy9/uKAYLQudaQouWihLD/hgkrXIKKzXi5IXOewwg=="], + + "@oxlint/binding-win32-arm64-msvc": ["@oxlint/binding-win32-arm64-msvc@1.56.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-PxT4OJDfMOQBzo3OlzFb9gkoSD+n8qSBxyVq2wQSZIHFQYGEqIRTo9M0ZStvZm5fdhMqaVYpOnJvH2hUMEDk/g=="], + + "@oxlint/binding-win32-ia32-msvc": ["@oxlint/binding-win32-ia32-msvc@1.56.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-PTRy6sIEPqy2x8PTP1baBNReN/BNEFmde0L+mYeHmjXE1Vlcc9+I5nsqENsB2yAm5wLkzPoTNCMY/7AnabT4/A=="], + + "@oxlint/binding-win32-x64-msvc": ["@oxlint/binding-win32-x64-msvc@1.56.0", "", { "os": "win32", "cpu": "x64" }, "sha512-ZHa0clocjLmIDr+1LwoWtxRcoYniAvERotvwKUYKhH41NVfl0Y4LNbyQkwMZzwDvKklKGvGZ5+DAG58/Ik47tQ=="], + "@radix-ui/primitive": ["@radix-ui/primitive@1.1.3", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="], "@radix-ui/react-compose-refs": ["@radix-ui/react-compose-refs@1.1.2", "", { "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg=="], @@ -338,60 +397,62 @@ "@rolldown/pluginutils": ["@rolldown/pluginutils@1.0.0-beta.27", "", {}, "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA=="], - "@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.59.0", "", { "os": "android", "cpu": "arm" }, "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg=="], + "@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.60.0", "", { "os": "android", "cpu": "arm" }, "sha512-WOhNW9K8bR3kf4zLxbfg6Pxu2ybOUbB2AjMDHSQx86LIF4rH4Ft7vmMwNt0loO0eonglSNy4cpD3MKXXKQu0/A=="], - "@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.59.0", "", { "os": "android", "cpu": "arm64" }, "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q=="], + "@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.60.0", "", { "os": "android", "cpu": "arm64" }, "sha512-u6JHLll5QKRvjciE78bQXDmqRqNs5M/3GVqZeMwvmjaNODJih/WIrJlFVEihvV0MiYFmd+ZyPr9wxOVbPAG2Iw=="], - "@rollup/rollup-darwin-arm64": ["@rollup/rollup-darwin-arm64@4.59.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg=="], + "@rollup/rollup-darwin-arm64": ["@rollup/rollup-darwin-arm64@4.60.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-qEF7CsKKzSRc20Ciu2Zw1wRrBz4g56F7r/vRwY430UPp/nt1x21Q/fpJ9N5l47WWvJlkNCPJz3QRVw008fi7yA=="], - "@rollup/rollup-darwin-x64": ["@rollup/rollup-darwin-x64@4.59.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w=="], + "@rollup/rollup-darwin-x64": ["@rollup/rollup-darwin-x64@4.60.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-WADYozJ4QCnXCH4wPB+3FuGmDPoFseVCUrANmA5LWwGmC6FL14BWC7pcq+FstOZv3baGX65tZ378uT6WG8ynTw=="], - "@rollup/rollup-freebsd-arm64": ["@rollup/rollup-freebsd-arm64@4.59.0", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA=="], + "@rollup/rollup-freebsd-arm64": ["@rollup/rollup-freebsd-arm64@4.60.0", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-6b8wGHJlDrGeSE3aH5mGNHBjA0TTkxdoNHik5EkvPHCt351XnigA4pS7Wsj/Eo9Y8RBU6f35cjN9SYmCFBtzxw=="], - "@rollup/rollup-freebsd-x64": ["@rollup/rollup-freebsd-x64@4.59.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg=="], + "@rollup/rollup-freebsd-x64": ["@rollup/rollup-freebsd-x64@4.60.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-h25Ga0t4jaylMB8M/JKAyrvvfxGRjnPQIR8lnCayyzEjEOx2EJIlIiMbhpWxDRKGKF8jbNH01NnN663dH638mA=="], - "@rollup/rollup-linux-arm-gnueabihf": ["@rollup/rollup-linux-arm-gnueabihf@4.59.0", "", { "os": "linux", "cpu": "arm" }, "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw=="], + "@rollup/rollup-linux-arm-gnueabihf": ["@rollup/rollup-linux-arm-gnueabihf@4.60.0", "", { "os": "linux", "cpu": "arm" }, "sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g=="], - "@rollup/rollup-linux-arm-musleabihf": ["@rollup/rollup-linux-arm-musleabihf@4.59.0", "", { "os": "linux", "cpu": "arm" }, "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA=="], + "@rollup/rollup-linux-arm-musleabihf": ["@rollup/rollup-linux-arm-musleabihf@4.60.0", "", { "os": "linux", "cpu": "arm" }, "sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ=="], - "@rollup/rollup-linux-arm64-gnu": ["@rollup/rollup-linux-arm64-gnu@4.59.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA=="], + "@rollup/rollup-linux-arm64-gnu": ["@rollup/rollup-linux-arm64-gnu@4.60.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A=="], - "@rollup/rollup-linux-arm64-musl": ["@rollup/rollup-linux-arm64-musl@4.59.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA=="], + "@rollup/rollup-linux-arm64-musl": ["@rollup/rollup-linux-arm64-musl@4.60.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ=="], - "@rollup/rollup-linux-loong64-gnu": ["@rollup/rollup-linux-loong64-gnu@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg=="], + "@rollup/rollup-linux-loong64-gnu": ["@rollup/rollup-linux-loong64-gnu@4.60.0", "", { "os": "linux", "cpu": "none" }, "sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw=="], - "@rollup/rollup-linux-loong64-musl": ["@rollup/rollup-linux-loong64-musl@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q=="], + "@rollup/rollup-linux-loong64-musl": ["@rollup/rollup-linux-loong64-musl@4.60.0", "", { "os": "linux", "cpu": "none" }, "sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog=="], - "@rollup/rollup-linux-ppc64-gnu": ["@rollup/rollup-linux-ppc64-gnu@4.59.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA=="], + "@rollup/rollup-linux-ppc64-gnu": ["@rollup/rollup-linux-ppc64-gnu@4.60.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ=="], - "@rollup/rollup-linux-ppc64-musl": ["@rollup/rollup-linux-ppc64-musl@4.59.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA=="], + "@rollup/rollup-linux-ppc64-musl": ["@rollup/rollup-linux-ppc64-musl@4.60.0", "", { "os": "linux", "cpu": "ppc64" }, "sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg=="], - "@rollup/rollup-linux-riscv64-gnu": ["@rollup/rollup-linux-riscv64-gnu@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg=="], + "@rollup/rollup-linux-riscv64-gnu": ["@rollup/rollup-linux-riscv64-gnu@4.60.0", "", { "os": "linux", "cpu": "none" }, "sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA=="], - "@rollup/rollup-linux-riscv64-musl": ["@rollup/rollup-linux-riscv64-musl@4.59.0", "", { "os": "linux", "cpu": "none" }, "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg=="], + "@rollup/rollup-linux-riscv64-musl": ["@rollup/rollup-linux-riscv64-musl@4.60.0", "", { "os": "linux", "cpu": "none" }, "sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ=="], - "@rollup/rollup-linux-s390x-gnu": ["@rollup/rollup-linux-s390x-gnu@4.59.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w=="], + "@rollup/rollup-linux-s390x-gnu": ["@rollup/rollup-linux-s390x-gnu@4.60.0", "", { "os": "linux", "cpu": "s390x" }, "sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ=="], - "@rollup/rollup-linux-x64-gnu": ["@rollup/rollup-linux-x64-gnu@4.59.0", "", { "os": "linux", "cpu": "x64" }, "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg=="], + "@rollup/rollup-linux-x64-gnu": ["@rollup/rollup-linux-x64-gnu@4.60.0", "", { "os": "linux", "cpu": "x64" }, "sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg=="], - "@rollup/rollup-linux-x64-musl": ["@rollup/rollup-linux-x64-musl@4.59.0", "", { "os": "linux", "cpu": "x64" }, "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg=="], + "@rollup/rollup-linux-x64-musl": ["@rollup/rollup-linux-x64-musl@4.60.0", "", { "os": "linux", "cpu": "x64" }, "sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw=="], - "@rollup/rollup-openbsd-x64": ["@rollup/rollup-openbsd-x64@4.59.0", "", { "os": "openbsd", "cpu": "x64" }, "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ=="], + "@rollup/rollup-openbsd-x64": ["@rollup/rollup-openbsd-x64@4.60.0", "", { "os": "openbsd", "cpu": "x64" }, "sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw=="], - "@rollup/rollup-openharmony-arm64": ["@rollup/rollup-openharmony-arm64@4.59.0", "", { "os": "none", "cpu": "arm64" }, "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA=="], + "@rollup/rollup-openharmony-arm64": ["@rollup/rollup-openharmony-arm64@4.60.0", "", { "os": "none", "cpu": "arm64" }, "sha512-pESDkos/PDzYwtyzB5p/UoNU/8fJo68vcXM9ZW2V0kjYayj1KaaUfi1NmTUTUpMn4UhU4gTuK8gIaFO4UGuMbA=="], - "@rollup/rollup-win32-arm64-msvc": ["@rollup/rollup-win32-arm64-msvc@4.59.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A=="], + "@rollup/rollup-win32-arm64-msvc": ["@rollup/rollup-win32-arm64-msvc@4.60.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-hj1wFStD7B1YBeYmvY+lWXZ7ey73YGPcViMShYikqKT1GtstIKQAtfUI6yrzPjAy/O7pO0VLXGmUVWXQMaYgTQ=="], - "@rollup/rollup-win32-ia32-msvc": ["@rollup/rollup-win32-ia32-msvc@4.59.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA=="], + "@rollup/rollup-win32-ia32-msvc": ["@rollup/rollup-win32-ia32-msvc@4.60.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-SyaIPFoxmUPlNDq5EHkTbiKzmSEmq/gOYFI/3HHJ8iS/v1mbugVa7dXUzcJGQfoytp9DJFLhHH4U3/eTy2Bq4w=="], - "@rollup/rollup-win32-x64-gnu": ["@rollup/rollup-win32-x64-gnu@4.59.0", "", { "os": "win32", "cpu": "x64" }, "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA=="], + "@rollup/rollup-win32-x64-gnu": ["@rollup/rollup-win32-x64-gnu@4.60.0", "", { "os": "win32", "cpu": "x64" }, "sha512-RdcryEfzZr+lAr5kRm2ucN9aVlCCa2QNq4hXelZxb8GG0NJSazq44Z3PCCc8wISRuCVnGs0lQJVX5Vp6fKA+IA=="], - "@rollup/rollup-win32-x64-msvc": ["@rollup/rollup-win32-x64-msvc@4.59.0", "", { "os": "win32", "cpu": "x64" }, "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA=="], + "@rollup/rollup-win32-x64-msvc": ["@rollup/rollup-win32-x64-msvc@4.60.0", "", { "os": "win32", "cpu": "x64" }, "sha512-PrsWNQ8BuE00O3Xsx3ALh2Df8fAj9+cvvX9AIA6o4KpATR98c9mud4XtDWVvsEuyia5U4tVSTKygawyJkjm60w=="], "@sec-ant/readable-stream": ["@sec-ant/readable-stream@0.4.1", "", {}, "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg=="], "@selftune/local-dashboard": ["@selftune/local-dashboard@workspace:apps/local-dashboard"], + "@selftune/telemetry-contract": ["@selftune/telemetry-contract@file:packages/telemetry-contract", { "dependencies": { "zod": "^4.3.6" } }], + "@selftune/telemetry-contract": ["@selftune/telemetry-contract@workspace:packages/telemetry-contract"], "@selftune/ui": ["@selftune/ui@workspace:packages/ui"], @@ -428,9 +489,9 @@ "@tailwindcss/vite": ["@tailwindcss/vite@4.2.2", "", { "dependencies": { "@tailwindcss/node": "4.2.2", "@tailwindcss/oxide": "4.2.2", "tailwindcss": "4.2.2" }, "peerDependencies": { "vite": "^5.2.0 || ^6 || ^7 || ^8" } }, "sha512-mEiF5HO1QqCLXoNEfXVA1Tzo+cYsrqV7w9Juj2wdUFyW07JRenqMG225MvPwr3ZD9N1bFQj46X7r33iHxLUW0w=="], - "@tanstack/query-core": ["@tanstack/query-core@5.91.2", "", {}, "sha512-Uz2pTgPC1mhqrrSGg18RKCWT/pkduAYtxbcyIyKBhw7dTWjXZIzqmpzO2lBkyWr4hlImQgpu1m1pei3UnkFRWw=="], + "@tanstack/query-core": ["@tanstack/query-core@5.94.5", "", {}, "sha512-Vx1JJiBURW/wdNGP45afjrqn0LfxYwL7K/bSrQvNRtyLGF1bxQPgUXCpzscG29e+UeFOh9hz1KOVala0N+bZiA=="], - "@tanstack/react-query": ["@tanstack/react-query@5.91.3", "", { "dependencies": { "@tanstack/query-core": "5.91.2" }, "peerDependencies": { "react": "^18 || ^19" } }, "sha512-D8jsCexxS5crZxAeiH6VlLHOUzmHOxeW5c11y8rZu0c34u/cy18hUKQXA/gn1Ila3ZIFzP+Pzv76YnliC0EtZQ=="], + "@tanstack/react-query": ["@tanstack/react-query@5.94.5", "", { "dependencies": { "@tanstack/query-core": "5.94.5" }, "peerDependencies": { "react": "^18 || ^19" } }, "sha512-1wmrxKFkor+q8l+ygdHmv0Sq5g84Q3p4xvuJ7AdSIAhQQ7udOt+ZSZ19g1Jea3mHqtlTslLGJsmC4vHFgP0P3A=="], "@tanstack/react-table": ["@tanstack/react-table@8.21.3", "", { "dependencies": { "@tanstack/table-core": "8.21.3" }, "peerDependencies": { "react": ">=16.8", "react-dom": ">=16.8" } }, "sha512-5nNMTSETP4ykGegmVkhjcS8tTLW6Vl4axfEGQN3v0zdHYbK4UfoqfPChclTrJ4EoK9QynqAu9oUf8VEmrpZ5Ww=="], @@ -536,7 +597,7 @@ "balanced-match": ["balanced-match@4.0.4", "", {}, "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA=="], - "baseline-browser-mapping": ["baseline-browser-mapping@2.10.9", "", { "bin": { "baseline-browser-mapping": "dist/cli.cjs" } }, "sha512-OZd0e2mU11ClX8+IdXe3r0dbqMEznRiT4TfbhYIbcRPZkqJ7Qwer8ij3GZAmLsRKa+II9V1v5czCkvmHH3XZBg=="], + "baseline-browser-mapping": ["baseline-browser-mapping@2.10.10", "", { "bin": { "baseline-browser-mapping": "dist/cli.cjs" } }, "sha512-sUoJ3IMxx4AyRqO4MLeHlnGDkyXRoUG0/AI9fjK+vS72ekpV0yWVY7O0BVjmBcRtkNcsAO2QDZ4tdKKGoI6YaQ=="], "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="], @@ -1024,7 +1085,7 @@ "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "msw": ["msw@2.12.13", "", { "dependencies": { "@inquirer/confirm": "^5.0.0", "@mswjs/interceptors": "^0.41.2", "@open-draft/deferred-promise": "^2.2.0", "@types/statuses": "^2.0.6", "cookie": "^1.0.2", "graphql": "^16.12.0", "headers-polyfill": "^4.0.2", "is-node-process": "^1.2.0", "outvariant": "^1.4.3", "path-to-regexp": "^6.3.0", "picocolors": "^1.1.1", "rettime": "^0.10.1", "statuses": "^2.0.2", "strict-event-emitter": "^0.5.1", "tough-cookie": "^6.0.0", "type-fest": "^5.2.0", "until-async": "^3.0.2", "yargs": "^17.7.2" }, "peerDependencies": { "typescript": ">= 4.8.x" }, "optionalPeers": ["typescript"], "bin": { "msw": "cli/index.js" } }, "sha512-9CV2mXT9+z0J26MQDfEZZkj/psJ5Er/w0w+t95FWdaGH/DTlhNZBx8vBO5jSYv8AZEnl3ouX+AaTT68KXdAIag=="], + "msw": ["msw@2.12.14", "", { "dependencies": { "@inquirer/confirm": "^5.0.0", "@mswjs/interceptors": "^0.41.2", "@open-draft/deferred-promise": "^2.2.0", "@types/statuses": "^2.0.6", "cookie": "^1.0.2", "graphql": "^16.12.0", "headers-polyfill": "^4.0.2", "is-node-process": "^1.2.0", "outvariant": "^1.4.3", "path-to-regexp": "^6.3.0", "picocolors": "^1.1.1", "rettime": "^0.10.1", "statuses": "^2.0.2", "strict-event-emitter": "^0.5.1", "tough-cookie": "^6.0.0", "type-fest": "^5.2.0", "until-async": "^3.0.2", "yargs": "^17.7.2" }, "peerDependencies": { "typescript": ">= 4.8.x" }, "optionalPeers": ["typescript"], "bin": { "msw": "cli/index.js" } }, "sha512-4KXa4nVBIBjbDbd7vfQNuQ25eFxug0aropCQFoI0JdOBuJWamkT1yLVIWReFI8SiTRc+H1hKzaNk+cLk2N9rtQ=="], "mute-stream": ["mute-stream@2.0.0", "", {}, "sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA=="], @@ -1060,6 +1121,10 @@ "outvariant": ["outvariant@1.4.3", "", {}, "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA=="], + "oxfmt": ["oxfmt@0.41.0", "", { "dependencies": { "tinypool": "2.1.0" }, "optionalDependencies": { "@oxfmt/binding-android-arm-eabi": "0.41.0", "@oxfmt/binding-android-arm64": "0.41.0", "@oxfmt/binding-darwin-arm64": "0.41.0", "@oxfmt/binding-darwin-x64": "0.41.0", "@oxfmt/binding-freebsd-x64": "0.41.0", "@oxfmt/binding-linux-arm-gnueabihf": "0.41.0", "@oxfmt/binding-linux-arm-musleabihf": "0.41.0", "@oxfmt/binding-linux-arm64-gnu": "0.41.0", "@oxfmt/binding-linux-arm64-musl": "0.41.0", "@oxfmt/binding-linux-ppc64-gnu": "0.41.0", "@oxfmt/binding-linux-riscv64-gnu": "0.41.0", "@oxfmt/binding-linux-riscv64-musl": "0.41.0", "@oxfmt/binding-linux-s390x-gnu": "0.41.0", "@oxfmt/binding-linux-x64-gnu": "0.41.0", "@oxfmt/binding-linux-x64-musl": "0.41.0", "@oxfmt/binding-openharmony-arm64": "0.41.0", "@oxfmt/binding-win32-arm64-msvc": "0.41.0", "@oxfmt/binding-win32-ia32-msvc": "0.41.0", "@oxfmt/binding-win32-x64-msvc": "0.41.0" }, "bin": { "oxfmt": "bin/oxfmt" } }, "sha512-sKLdJZdQ3bw6x9qKiT7+eID4MNEXlDHf5ZacfIircrq6Qwjk0L6t2/JQlZZrVHTXJawK3KaMuBoJnEJPcqCEdg=="], + + "oxlint": ["oxlint@1.56.0", "", { "optionalDependencies": { "@oxlint/binding-android-arm-eabi": "1.56.0", "@oxlint/binding-android-arm64": "1.56.0", "@oxlint/binding-darwin-arm64": "1.56.0", "@oxlint/binding-darwin-x64": "1.56.0", "@oxlint/binding-freebsd-x64": "1.56.0", "@oxlint/binding-linux-arm-gnueabihf": "1.56.0", "@oxlint/binding-linux-arm-musleabihf": "1.56.0", "@oxlint/binding-linux-arm64-gnu": "1.56.0", "@oxlint/binding-linux-arm64-musl": "1.56.0", "@oxlint/binding-linux-ppc64-gnu": "1.56.0", "@oxlint/binding-linux-riscv64-gnu": "1.56.0", "@oxlint/binding-linux-riscv64-musl": "1.56.0", "@oxlint/binding-linux-s390x-gnu": "1.56.0", "@oxlint/binding-linux-x64-gnu": "1.56.0", "@oxlint/binding-linux-x64-musl": "1.56.0", "@oxlint/binding-openharmony-arm64": "1.56.0", "@oxlint/binding-win32-arm64-msvc": "1.56.0", "@oxlint/binding-win32-ia32-msvc": "1.56.0", "@oxlint/binding-win32-x64-msvc": "1.56.0" }, "peerDependencies": { "oxlint-tsgolint": ">=0.15.0" }, "optionalPeers": ["oxlint-tsgolint"], "bin": { "oxlint": "bin/oxlint" } }, "sha512-Q+5Mj5PVaH/R6/fhMMFzw4dT+KPB+kQW4kaL8FOIq7tfhlnEVp6+3lcWqFruuTNlUo9srZUW3qH7Id4pskeR6g=="], + "parent-module": ["parent-module@1.0.1", "", { "dependencies": { "callsites": "^3.0.0" } }, "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g=="], "parse-entities": ["parse-entities@4.0.2", "", { "dependencies": { "@types/unist": "^2.0.0", "character-entities-legacy": "^3.0.0", "character-reference-invalid": "^2.0.0", "decode-named-character-reference": "^1.0.0", "is-alphanumerical": "^2.0.0", "is-decimal": "^2.0.0", "is-hexadecimal": "^2.0.0" } }, "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw=="], @@ -1158,7 +1223,7 @@ "reusify": ["reusify@1.1.0", "", {}, "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw=="], - "rollup": ["rollup@4.59.0", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.59.0", "@rollup/rollup-android-arm64": "4.59.0", "@rollup/rollup-darwin-arm64": "4.59.0", "@rollup/rollup-darwin-x64": "4.59.0", "@rollup/rollup-freebsd-arm64": "4.59.0", "@rollup/rollup-freebsd-x64": "4.59.0", "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", "@rollup/rollup-linux-arm-musleabihf": "4.59.0", "@rollup/rollup-linux-arm64-gnu": "4.59.0", "@rollup/rollup-linux-arm64-musl": "4.59.0", "@rollup/rollup-linux-loong64-gnu": "4.59.0", "@rollup/rollup-linux-loong64-musl": "4.59.0", "@rollup/rollup-linux-ppc64-gnu": "4.59.0", "@rollup/rollup-linux-ppc64-musl": "4.59.0", "@rollup/rollup-linux-riscv64-gnu": "4.59.0", "@rollup/rollup-linux-riscv64-musl": "4.59.0", "@rollup/rollup-linux-s390x-gnu": "4.59.0", "@rollup/rollup-linux-x64-gnu": "4.59.0", "@rollup/rollup-linux-x64-musl": "4.59.0", "@rollup/rollup-openbsd-x64": "4.59.0", "@rollup/rollup-openharmony-arm64": "4.59.0", "@rollup/rollup-win32-arm64-msvc": "4.59.0", "@rollup/rollup-win32-ia32-msvc": "4.59.0", "@rollup/rollup-win32-x64-gnu": "4.59.0", "@rollup/rollup-win32-x64-msvc": "4.59.0", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg=="], + "rollup": ["rollup@4.60.0", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.60.0", "@rollup/rollup-android-arm64": "4.60.0", "@rollup/rollup-darwin-arm64": "4.60.0", "@rollup/rollup-darwin-x64": "4.60.0", "@rollup/rollup-freebsd-arm64": "4.60.0", "@rollup/rollup-freebsd-x64": "4.60.0", "@rollup/rollup-linux-arm-gnueabihf": "4.60.0", "@rollup/rollup-linux-arm-musleabihf": "4.60.0", "@rollup/rollup-linux-arm64-gnu": "4.60.0", "@rollup/rollup-linux-arm64-musl": "4.60.0", "@rollup/rollup-linux-loong64-gnu": "4.60.0", "@rollup/rollup-linux-loong64-musl": "4.60.0", "@rollup/rollup-linux-ppc64-gnu": "4.60.0", "@rollup/rollup-linux-ppc64-musl": "4.60.0", "@rollup/rollup-linux-riscv64-gnu": "4.60.0", "@rollup/rollup-linux-riscv64-musl": "4.60.0", "@rollup/rollup-linux-s390x-gnu": "4.60.0", "@rollup/rollup-linux-x64-gnu": "4.60.0", "@rollup/rollup-linux-x64-musl": "4.60.0", "@rollup/rollup-openbsd-x64": "4.60.0", "@rollup/rollup-openharmony-arm64": "4.60.0", "@rollup/rollup-win32-arm64-msvc": "4.60.0", "@rollup/rollup-win32-ia32-msvc": "4.60.0", "@rollup/rollup-win32-x64-gnu": "4.60.0", "@rollup/rollup-win32-x64-msvc": "4.60.0", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-yqjxruMGBQJ2gG4HtjZtAfXArHomazDHoFwFFmZZl0r7Pdo7qCIXKqKHZc8yeoMgzJJ+pO6pEEHa+V7uzWlrAQ=="], "router": ["router@2.2.0", "", { "dependencies": { "debug": "^4.4.0", "depd": "^2.0.0", "is-promise": "^4.0.0", "parseurl": "^1.3.3", "path-to-regexp": "^8.0.0" } }, "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ=="], @@ -1260,15 +1325,15 @@ "tinyglobby": ["tinyglobby@0.2.15", "", { "dependencies": { "fdir": "^6.5.0", "picomatch": "^4.0.3" } }, "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ=="], - "tinypool": ["tinypool@1.1.1", "", {}, "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg=="], + "tinypool": ["tinypool@2.1.0", "", {}, "sha512-Pugqs6M0m7Lv1I7FtxN4aoyToKg1C4tu+/381vH35y8oENM/Ai7f7C4StcoK4/+BSw9ebcS8jRiVrORFKCALLw=="], "tinyrainbow": ["tinyrainbow@2.0.0", "", {}, "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw=="], "tinyspy": ["tinyspy@4.0.4", "", {}, "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q=="], - "tldts": ["tldts@7.0.26", "", { "dependencies": { "tldts-core": "^7.0.26" }, "bin": { "tldts": "bin/cli.js" } }, "sha512-WiGwQjr0qYdNNG8KpMKlSvpxz652lqa3Rd+/hSaDcY4Uo6SKWZq2LAF+hsAhUewTtYhXlorBKgNF3Kk8hnjGoQ=="], + "tldts": ["tldts@7.0.27", "", { "dependencies": { "tldts-core": "^7.0.27" }, "bin": { "tldts": "bin/cli.js" } }, "sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg=="], - "tldts-core": ["tldts-core@7.0.26", "", {}, "sha512-5WJ2SqFsv4G2Dwi7ZFVRnz6b2H1od39QME1lc2y5Ew3eWiZMAeqOAfWpRP9jHvhUl881406QtZTODvjttJs+ew=="], + "tldts-core": ["tldts-core@7.0.27", "", {}, "sha512-YQ7uPjgWUibIK6DW5lrKujGwUKhLevU4hcGbP5O6TcIUb+oTjJYJVWPS4nZsIHrEEEG6myk/oqAJUEQmpZrHsg=="], "to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="], @@ -1430,6 +1495,8 @@ "strip-literal/js-tokens": ["js-tokens@9.0.1", "", {}, "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ=="], + "vitest/tinypool": ["tinypool@1.1.1", "", {}, "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg=="], + "wrap-ansi/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], "@dotenvx/dotenvx/execa/get-stream": ["get-stream@6.0.1", "", {}, "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg=="], diff --git a/cli/selftune/activation-rules.ts b/cli/selftune/activation-rules.ts index e7649644..da6f1940 100644 --- a/cli/selftune/activation-rules.ts +++ b/cli/selftune/activation-rules.ts @@ -12,6 +12,7 @@ import { existsSync, readdirSync, readFileSync } from "node:fs"; import { dirname, join } from "node:path"; + import { EVOLUTION_AUDIT_LOG, QUERY_LOG } from "./constants.js"; import { getDb } from "./localdb/db.js"; import { queryEvolutionAudit, queryQueryLog, querySkillUsageRecords } from "./localdb/queries.js"; diff --git a/cli/selftune/alpha-upload/build-payloads.ts b/cli/selftune/alpha-upload/build-payloads.ts index ccb0cd84..e2647883 100644 --- a/cli/selftune/alpha-upload/build-payloads.ts +++ b/cli/selftune/alpha-upload/build-payloads.ts @@ -10,7 +10,9 @@ */ import type { Database } from "bun:sqlite"; + import type { CanonicalRecord } from "@selftune/telemetry-contract"; + import { buildPushPayloadV2 } from "../canonical-export.js"; import type { EvolutionEvidenceEntry } from "../types.js"; diff --git a/cli/selftune/alpha-upload/stage-canonical.ts b/cli/selftune/alpha-upload/stage-canonical.ts index 85ac9e8f..cdb70ebf 100644 --- a/cli/selftune/alpha-upload/stage-canonical.ts +++ b/cli/selftune/alpha-upload/stage-canonical.ts @@ -11,8 +11,10 @@ import type { Database } from "bun:sqlite"; import { createHash } from "node:crypto"; + import type { CanonicalRecord } from "@selftune/telemetry-contract"; import { isCanonicalRecord } from "@selftune/telemetry-contract"; + import { CANONICAL_LOG } from "../constants.js"; import { getOrchestrateRuns, diff --git a/cli/selftune/auto-update.ts b/cli/selftune/auto-update.ts index dbea38d6..2d82682f 100644 --- a/cli/selftune/auto-update.ts +++ b/cli/selftune/auto-update.ts @@ -9,6 +9,7 @@ import { spawnSync } from "node:child_process"; import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { join } from "node:path"; + import { SELFTUNE_CONFIG_DIR } from "./constants.js"; const UPDATE_CHECK_PATH = join(SELFTUNE_CONFIG_DIR, "update-check.json"); diff --git a/cli/selftune/badge/badge.ts b/cli/selftune/badge/badge.ts index 5fe96d75..fe54ea34 100644 --- a/cli/selftune/badge/badge.ts +++ b/cli/selftune/badge/badge.ts @@ -8,6 +8,7 @@ import { writeFileSync } from "node:fs"; import { parseArgs } from "node:util"; + import { getDb } from "../localdb/db.js"; import { queryEvolutionAudit, diff --git a/cli/selftune/canonical-export.ts b/cli/selftune/canonical-export.ts index 3331f220..b2805041 100644 --- a/cli/selftune/canonical-export.ts +++ b/cli/selftune/canonical-export.ts @@ -4,6 +4,7 @@ import { randomUUID } from "node:crypto"; import { readFileSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { parseArgs } from "node:util"; + import { CANONICAL_LOG, CLAUDE_CODE_PROJECTS_DIR } from "./constants.js"; import { buildCanonicalRecordsFromReplay, diff --git a/cli/selftune/contribute/bundle.ts b/cli/selftune/contribute/bundle.ts index 1f0a8c7a..adc68a3b 100644 --- a/cli/selftune/contribute/bundle.ts +++ b/cli/selftune/contribute/bundle.ts @@ -8,6 +8,7 @@ import { randomUUID } from "node:crypto"; import { existsSync, readdirSync, readFileSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; + import { EVOLUTION_AUDIT_LOG, QUERY_LOG, diff --git a/cli/selftune/contribute/contribute.ts b/cli/selftune/contribute/contribute.ts index 84ed5e39..57f18d37 100644 --- a/cli/selftune/contribute/contribute.ts +++ b/cli/selftune/contribute/contribute.ts @@ -10,6 +10,7 @@ import { spawnSync } from "node:child_process"; import { existsSync, mkdirSync, writeFileSync } from "node:fs"; import { parseArgs } from "node:util"; + import { CONTRIBUTIONS_DIR } from "../constants.js"; import { assembleBundle } from "./bundle.js"; import { sanitizeBundle } from "./sanitize.js"; diff --git a/cli/selftune/dashboard-server.ts b/cli/selftune/dashboard-server.ts index 9508934f..3d600b65 100644 --- a/cli/selftune/dashboard-server.ts +++ b/cli/selftune/dashboard-server.ts @@ -19,6 +19,7 @@ import type { Database } from "bun:sqlite"; import { existsSync, readFileSync, unwatchFile, watchFile } from "node:fs"; import { dirname, extname, isAbsolute, join, relative, resolve } from "node:path"; + import type { BadgeFormat } from "./badge/badge-svg.js"; import { LOG_DIR, SELFTUNE_CONFIG_DIR } from "./constants.js"; import type { diff --git a/cli/selftune/eval/hooks-to-evals.ts b/cli/selftune/eval/hooks-to-evals.ts index 0213de82..242f1f9e 100644 --- a/cli/selftune/eval/hooks-to-evals.ts +++ b/cli/selftune/eval/hooks-to-evals.ts @@ -21,6 +21,7 @@ import { writeFileSync } from "node:fs"; import { parseArgs } from "node:util"; + import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js"; import { getDb } from "../localdb/db.js"; import { diff --git a/cli/selftune/eval/import-skillsbench.ts b/cli/selftune/eval/import-skillsbench.ts index 0c61eb45..f4355394 100644 --- a/cli/selftune/eval/import-skillsbench.ts +++ b/cli/selftune/eval/import-skillsbench.ts @@ -13,6 +13,7 @@ import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { parseArgs } from "node:util"; + import type { EvalEntry, SkillsBenchTask } from "../types.js"; // --------------------------------------------------------------------------- diff --git a/cli/selftune/eval/synthetic-evals.ts b/cli/selftune/eval/synthetic-evals.ts index 28d76f46..0117afce 100644 --- a/cli/selftune/eval/synthetic-evals.ts +++ b/cli/selftune/eval/synthetic-evals.ts @@ -181,9 +181,8 @@ export async function generateSyntheticEvals( try { const { getDb } = await import("../localdb/db.js"); const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js"); - const { isHighConfidencePositiveSkillRecord } = await import( - "../utils/skill-usage-confidence.js" - ); + const { isHighConfidencePositiveSkillRecord } = + await import("../utils/skill-usage-confidence.js"); const db = getDb(); diff --git a/cli/selftune/eval/unit-test.ts b/cli/selftune/eval/unit-test.ts index 2dcd09db..f9d455d1 100644 --- a/cli/selftune/eval/unit-test.ts +++ b/cli/selftune/eval/unit-test.ts @@ -12,6 +12,7 @@ */ import { existsSync, readFileSync } from "node:fs"; + import type { SkillAssertion, SkillUnitTest, diff --git a/cli/selftune/evolution/deploy-proposal.ts b/cli/selftune/evolution/deploy-proposal.ts index e3201e97..5bd211fd 100644 --- a/cli/selftune/evolution/deploy-proposal.ts +++ b/cli/selftune/evolution/deploy-proposal.ts @@ -7,6 +7,7 @@ */ import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs"; + import type { EvolutionProposal, SkillSections } from "../types.js"; import type { ValidationResult } from "./validate-proposal.js"; diff --git a/cli/selftune/evolution/evolve-body.ts b/cli/selftune/evolution/evolve-body.ts index 4324ea55..6cc3ccd7 100644 --- a/cli/selftune/evolution/evolve-body.ts +++ b/cli/selftune/evolution/evolve-body.ts @@ -25,7 +25,6 @@ import type { QueryLogRecord, SkillUsageRecord, } from "../types.js"; - import { appendAuditEntry } from "./audit.js"; import { checkConstitutionSizeOnly } from "./constitutional.js"; import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js"; diff --git a/cli/selftune/evolution/evolve.ts b/cli/selftune/evolution/evolve.ts index a2f8b6bc..c7f640b1 100644 --- a/cli/selftune/evolution/evolve.ts +++ b/cli/selftune/evolution/evolve.ts @@ -37,7 +37,6 @@ import type { SkillUsageRecord, } from "../types.js"; import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js"; - import { createEvolveTUI } from "../utils/tui.js"; import { appendAuditEntry } from "./audit.js"; import { checkConstitution } from "./constitutional.js"; diff --git a/cli/selftune/export.ts b/cli/selftune/export.ts index edf4d711..93773d29 100644 --- a/cli/selftune/export.ts +++ b/cli/selftune/export.ts @@ -5,6 +5,7 @@ */ import { mkdirSync, writeFileSync } from "node:fs"; import { join } from "node:path"; + import { getDb } from "./localdb/db.js"; import { getOrchestrateRuns, diff --git a/cli/selftune/hooks/auto-activate.ts b/cli/selftune/hooks/auto-activate.ts index 61b4dc8f..3ac768af 100644 --- a/cli/selftune/hooks/auto-activate.ts +++ b/cli/selftune/hooks/auto-activate.ts @@ -11,6 +11,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { dirname } from "node:path"; + import { CLAUDE_SETTINGS_PATH, EVOLUTION_AUDIT_LOG, diff --git a/cli/selftune/hooks/evolution-guard.ts b/cli/selftune/hooks/evolution-guard.ts index 537d0a7c..c01aa0e4 100644 --- a/cli/selftune/hooks/evolution-guard.ts +++ b/cli/selftune/hooks/evolution-guard.ts @@ -16,8 +16,8 @@ import { existsSync, readFileSync } from "node:fs"; import { basename, dirname, join } from "node:path"; -import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js"; +import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js"; import type { PreToolUsePayload } from "../types.js"; import { readJsonl } from "../utils/jsonl.js"; diff --git a/cli/selftune/hooks/prompt-log.ts b/cli/selftune/hooks/prompt-log.ts index 01382c32..adda1423 100644 --- a/cli/selftune/hooks/prompt-log.ts +++ b/cli/selftune/hooks/prompt-log.ts @@ -11,6 +11,7 @@ import { readdirSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; + import { CANONICAL_LOG, QUERY_LOG, SKIP_PREFIXES } from "../constants.js"; import { appendCanonicalRecord, diff --git a/cli/selftune/hooks/session-stop.ts b/cli/selftune/hooks/session-stop.ts index f8572550..31c5281d 100644 --- a/cli/selftune/hooks/session-stop.ts +++ b/cli/selftune/hooks/session-stop.ts @@ -10,8 +10,8 @@ import { execSync } from "node:child_process"; import { closeSync, openSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; -import { CANONICAL_LOG, ORCHESTRATE_LOCK, TELEMETRY_LOG } from "../constants.js"; +import { CANONICAL_LOG, ORCHESTRATE_LOCK, TELEMETRY_LOG } from "../constants.js"; import { appendCanonicalRecords, buildCanonicalExecutionFact, diff --git a/cli/selftune/hooks/skill-change-guard.ts b/cli/selftune/hooks/skill-change-guard.ts index 711c61a8..31776593 100644 --- a/cli/selftune/hooks/skill-change-guard.ts +++ b/cli/selftune/hooks/skill-change-guard.ts @@ -12,6 +12,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { basename, dirname } from "node:path"; + import { SESSION_STATE_DIR } from "../constants.js"; import type { PreToolUsePayload } from "../types.js"; diff --git a/cli/selftune/hooks/skill-eval.ts b/cli/selftune/hooks/skill-eval.ts index 09249c09..aca07014 100644 --- a/cli/selftune/hooks/skill-eval.ts +++ b/cli/selftune/hooks/skill-eval.ts @@ -13,6 +13,7 @@ import { existsSync, readFileSync } from "node:fs"; import { basename, dirname } from "node:path"; + import { CANONICAL_LOG, SKILL_LOG } from "../constants.js"; import { appendCanonicalRecord, @@ -24,7 +25,6 @@ import { getLatestPromptIdentity, } from "../normalization.js"; import type { PostToolUsePayload, SkillUsageRecord } from "../types.js"; - import { classifySkillPath } from "../utils/skill-discovery.js"; import { getLastUserMessage } from "../utils/transcript.js"; diff --git a/cli/selftune/index.ts b/cli/selftune/index.ts index 73634592..07974ff9 100644 --- a/cli/selftune/index.ts +++ b/cli/selftune/index.ts @@ -606,9 +606,8 @@ Output: const { readAlphaIdentity } = await import("./alpha-identity.js"); const { getDb } = await import("./localdb/db.js"); const { runUploadCycle } = await import("./alpha-upload/index.js"); - const { getSelftuneVersion, readConfiguredAgentType } = await import( - "./utils/selftune-meta.js" - ); + const { getSelftuneVersion, readConfiguredAgentType } = + await import("./utils/selftune-meta.js"); const identity = readAlphaIdentity(SELFTUNE_CONFIG_PATH); if (!identity?.enrolled) { @@ -670,9 +669,8 @@ Output: } case "relink": { const { SELFTUNE_CONFIG_PATH } = await import("./constants.js"); - const { readAlphaIdentity, writeAlphaIdentity, generateUserId } = await import( - "./alpha-identity.js" - ); + const { readAlphaIdentity, writeAlphaIdentity, generateUserId } = + await import("./alpha-identity.js"); const { buildVerificationUrl, pollDeviceCode, requestDeviceCode, tryOpenUrl } = await import("./auth/device-code.js"); const { chmodSync } = await import("node:fs"); diff --git a/cli/selftune/ingestors/claude-replay.ts b/cli/selftune/ingestors/claude-replay.ts index e072d148..ccd137b3 100644 --- a/cli/selftune/ingestors/claude-replay.ts +++ b/cli/selftune/ingestors/claude-replay.ts @@ -24,6 +24,7 @@ import { statSync } from "node:fs"; import { basename } from "node:path"; import { parseArgs } from "node:util"; + import { CANONICAL_LOG, CLAUDE_CODE_MARKER, diff --git a/cli/selftune/ingestors/codex-rollout.ts b/cli/selftune/ingestors/codex-rollout.ts index b77ce854..95745251 100644 --- a/cli/selftune/ingestors/codex-rollout.ts +++ b/cli/selftune/ingestors/codex-rollout.ts @@ -25,6 +25,7 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { homedir } from "node:os"; import { basename, join } from "node:path"; import { parseArgs } from "node:util"; + import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js"; import { appendCanonicalRecords, diff --git a/cli/selftune/ingestors/codex-wrapper.ts b/cli/selftune/ingestors/codex-wrapper.ts index 76aa6945..a297fc8d 100644 --- a/cli/selftune/ingestors/codex-wrapper.ts +++ b/cli/selftune/ingestors/codex-wrapper.ts @@ -19,6 +19,7 @@ import { homedir } from "node:os"; import { join } from "node:path"; + import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js"; import { appendCanonicalRecords, diff --git a/cli/selftune/ingestors/openclaw-ingest.ts b/cli/selftune/ingestors/openclaw-ingest.ts index 3f11c478..63474fe3 100644 --- a/cli/selftune/ingestors/openclaw-ingest.ts +++ b/cli/selftune/ingestors/openclaw-ingest.ts @@ -25,6 +25,7 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { homedir } from "node:os"; import { basename, join } from "node:path"; import { parseArgs } from "node:util"; + import { CANONICAL_LOG, OPENCLAW_AGENTS_DIR, diff --git a/cli/selftune/ingestors/opencode-ingest.ts b/cli/selftune/ingestors/opencode-ingest.ts index 32f4aac6..1c0f51cd 100644 --- a/cli/selftune/ingestors/opencode-ingest.ts +++ b/cli/selftune/ingestors/opencode-ingest.ts @@ -25,6 +25,7 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { homedir } from "node:os"; import { basename, join } from "node:path"; import { parseArgs } from "node:util"; + import { CANONICAL_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js"; import { appendCanonicalRecords, diff --git a/cli/selftune/localdb/db.ts b/cli/selftune/localdb/db.ts index c4e8f0e6..46d28473 100644 --- a/cli/selftune/localdb/db.ts +++ b/cli/selftune/localdb/db.ts @@ -11,6 +11,7 @@ import { Database } from "bun:sqlite"; import { existsSync, mkdirSync } from "node:fs"; import { dirname, join } from "node:path"; + import { SELFTUNE_CONFIG_DIR } from "../constants.js"; import { ALL_DDL, MIGRATIONS, POST_MIGRATION_INDEXES } from "./schema.js"; diff --git a/cli/selftune/localdb/direct-write.ts b/cli/selftune/localdb/direct-write.ts index 38abdbde..b6984d7c 100644 --- a/cli/selftune/localdb/direct-write.ts +++ b/cli/selftune/localdb/direct-write.ts @@ -10,6 +10,7 @@ */ import type { Database } from "bun:sqlite"; + import type { CanonicalExecutionFactRecord, CanonicalPromptRecord, @@ -17,6 +18,7 @@ import type { CanonicalSessionRecord, CanonicalSkillInvocationRecord, } from "@selftune/telemetry-contract"; + import type { OrchestrateRunReport } from "../dashboard-contract.js"; import type { EvolutionAuditEntry, diff --git a/cli/selftune/localdb/materialize.ts b/cli/selftune/localdb/materialize.ts index d81a098c..41a8ee00 100644 --- a/cli/selftune/localdb/materialize.ts +++ b/cli/selftune/localdb/materialize.ts @@ -14,6 +14,7 @@ // 3. Backfill from batch ingestors that don't yet dual-write import type { Database } from "bun:sqlite"; + import { type CanonicalExecutionFactRecord, type CanonicalPromptRecord, @@ -22,6 +23,7 @@ import { type CanonicalSkillInvocationRecord, isCanonicalRecord, } from "@selftune/telemetry-contract"; + import { CANONICAL_LOG, EVOLUTION_AUDIT_LOG, diff --git a/cli/selftune/localdb/queries.ts b/cli/selftune/localdb/queries.ts index 46b54325..632d8a8e 100644 --- a/cli/selftune/localdb/queries.ts +++ b/cli/selftune/localdb/queries.ts @@ -6,6 +6,7 @@ */ import type { Database } from "bun:sqlite"; + import type { OrchestrateRunReport, OverviewPayload, diff --git a/cli/selftune/normalization.ts b/cli/selftune/normalization.ts index 86356b37..0cc0791f 100644 --- a/cli/selftune/normalization.ts +++ b/cli/selftune/normalization.ts @@ -24,6 +24,7 @@ import { writeFileSync, } from "node:fs"; import { basename, dirname } from "node:path"; + import { CANONICAL_LOG, canonicalSessionStatePath } from "./constants.js"; import { writeCanonicalBatchToDb, writeCanonicalToDb } from "./localdb/direct-write.js"; import { diff --git a/cli/selftune/observability.ts b/cli/selftune/observability.ts index 2c93e10a..1c361d95 100644 --- a/cli/selftune/observability.ts +++ b/cli/selftune/observability.ts @@ -11,6 +11,7 @@ import { existsSync, readFileSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; + import { getAlphaGuidance } from "./agent-guidance.js"; import { getAlphaLinkState, readAlphaIdentity } from "./alpha-identity.js"; import { LOG_DIR, REQUIRED_FIELDS, SELFTUNE_CONFIG_PATH } from "./constants.js"; diff --git a/cli/selftune/repair/skill-usage.ts b/cli/selftune/repair/skill-usage.ts index 8d3dfbb1..71a63e0e 100644 --- a/cli/selftune/repair/skill-usage.ts +++ b/cli/selftune/repair/skill-usage.ts @@ -3,6 +3,7 @@ import { existsSync, readFileSync, statSync } from "node:fs"; import { basename, dirname, join } from "node:path"; import { parseArgs } from "node:util"; + import { CLAUDE_CODE_PROJECTS_DIR, QUERY_LOG, diff --git a/cli/selftune/routes/orchestrate-runs.ts b/cli/selftune/routes/orchestrate-runs.ts index 77c814f6..d1aeb604 100644 --- a/cli/selftune/routes/orchestrate-runs.ts +++ b/cli/selftune/routes/orchestrate-runs.ts @@ -5,6 +5,7 @@ */ import type { Database } from "bun:sqlite"; + import { getOrchestrateRuns } from "../localdb/queries.js"; export function handleOrchestrateRuns(db: Database, limit: number): Response { diff --git a/cli/selftune/routes/overview.ts b/cli/selftune/routes/overview.ts index b7ec55b3..f27d7802 100644 --- a/cli/selftune/routes/overview.ts +++ b/cli/selftune/routes/overview.ts @@ -5,6 +5,7 @@ */ import type { Database } from "bun:sqlite"; + import { getOverviewPayload, getSkillsList } from "../localdb/queries.js"; export function handleOverview(db: Database, version: string): Response { diff --git a/cli/selftune/routes/skill-report.ts b/cli/selftune/routes/skill-report.ts index cb885b5a..c37a2849 100644 --- a/cli/selftune/routes/skill-report.ts +++ b/cli/selftune/routes/skill-report.ts @@ -7,6 +7,7 @@ */ import type { Database } from "bun:sqlite"; + import { getPendingProposals, getSkillReportPayload, safeParseJson } from "../localdb/queries.js"; export function handleSkillReport(db: Database, skillName: string): Response { diff --git a/cli/selftune/sync.ts b/cli/selftune/sync.ts index b4336bce..301ab439 100644 --- a/cli/selftune/sync.ts +++ b/cli/selftune/sync.ts @@ -17,6 +17,7 @@ import { existsSync } from "node:fs"; import { homedir } from "node:os"; import { join } from "node:path"; import { parseArgs } from "node:util"; + import { CLAUDE_CODE_MARKER, CLAUDE_CODE_PROJECTS_DIR, diff --git a/cli/selftune/utils/canonical-log.ts b/cli/selftune/utils/canonical-log.ts index c70ba8c8..3603dac7 100644 --- a/cli/selftune/utils/canonical-log.ts +++ b/cli/selftune/utils/canonical-log.ts @@ -1,10 +1,12 @@ import { existsSync, writeFileSync } from "node:fs"; + import { type CanonicalPlatform, type CanonicalRecord, type CanonicalRecordKind, isCanonicalRecord, } from "@selftune/telemetry-contract"; + import { CANONICAL_LOG } from "../constants.js"; import { readJsonl } from "./jsonl.js"; diff --git a/cli/selftune/utils/jsonl.ts b/cli/selftune/utils/jsonl.ts index 6769e211..f499b411 100644 --- a/cli/selftune/utils/jsonl.ts +++ b/cli/selftune/utils/jsonl.ts @@ -14,6 +14,7 @@ import { writeFileSync, } from "node:fs"; import { dirname } from "node:path"; + import { createLogger } from "./logging.js"; import type { LogType } from "./schema-validator.js"; import { validateRecord } from "./schema-validator.js"; diff --git a/cli/selftune/utils/skill-log.ts b/cli/selftune/utils/skill-log.ts index e7d973bf..03f1a4b0 100644 --- a/cli/selftune/utils/skill-log.ts +++ b/cli/selftune/utils/skill-log.ts @@ -1,5 +1,6 @@ import { existsSync, mkdirSync, writeFileSync } from "node:fs"; import { dirname } from "node:path"; + import { REPAIRED_SKILL_LOG, REPAIRED_SKILL_SESSIONS_MARKER, SKILL_LOG } from "../constants.js"; import type { SkillUsageRecord } from "../types.js"; import { loadMarker, readJsonl, saveMarker } from "./jsonl.js"; diff --git a/cli/selftune/utils/transcript.ts b/cli/selftune/utils/transcript.ts index a5f08c4e..37fc5d12 100644 --- a/cli/selftune/utils/transcript.ts +++ b/cli/selftune/utils/transcript.ts @@ -4,6 +4,7 @@ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs"; import { basename, dirname } from "node:path"; + import { CLAUDE_CODE_PROJECTS_DIR } from "../constants.js"; import type { SessionTelemetryRecord, TranscriptMetrics } from "../types.js"; import { isActionableQueryText } from "./query-filter.js"; diff --git a/cli/selftune/utils/trigger-check.ts b/cli/selftune/utils/trigger-check.ts index da127f73..e9cad381 100644 --- a/cli/selftune/utils/trigger-check.ts +++ b/cli/selftune/utils/trigger-check.ts @@ -64,7 +64,7 @@ export function buildBatchTriggerCheckPrompt(description: string, queries: strin * original query order. Defaults to false for unparseable or missing lines. */ export function parseBatchTriggerResponse(response: string, queryCount: number): boolean[] { - const results: boolean[] = new Array(queryCount).fill(false); + const results: boolean[] = Array.from({ length: queryCount }, () => false); const lines = response.trim().split("\n"); for (const line of lines) { diff --git a/cli/selftune/workflows/skill-md-writer.ts b/cli/selftune/workflows/skill-md-writer.ts index 6c006727..db100339 100644 --- a/cli/selftune/workflows/skill-md-writer.ts +++ b/cli/selftune/workflows/skill-md-writer.ts @@ -36,7 +36,7 @@ export function parseWorkflowsSection(content: string): CodifiedWorkflow[] { // Find the end of the section (next ## heading or EOF) let sectionEnd = lines.length; for (let i = sectionStart; i < lines.length; i++) { - if (/^## /.test(lines[i]) && lines[i].trim() !== "## Workflows") { + if (lines[i].startsWith("## ") && lines[i].trim() !== "## Workflows") { sectionEnd = i; break; } @@ -155,7 +155,7 @@ export function appendWorkflow(content: string, workflow: CodifiedWorkflow): str // Find the end of the workflows section (next ## heading or EOF) let sectionEnd = lines.length; for (let i = sectionStart + 1; i < lines.length; i++) { - if (/^## /.test(lines[i])) { + if (lines[i].startsWith("## ")) { sectionEnd = i; break; } @@ -210,7 +210,7 @@ export function removeWorkflow(content: string, name: string): string { // Find the end of the workflows section let sectionEnd = lines.length; for (let i = sectionStart + 1; i < lines.length; i++) { - if (/^## /.test(lines[i])) { + if (lines[i].startsWith("## ")) { sectionEnd = i; break; } @@ -226,7 +226,7 @@ export function removeWorkflow(content: string, name: string): string { // Find the end of this subsection (next ### or ## or sectionEnd) subEnd = sectionEnd; for (let j = i + 1; j < sectionEnd; j++) { - if (/^### /.test(lines[j])) { + if (lines[j].startsWith("### ")) { subEnd = j; break; } @@ -255,7 +255,7 @@ export function removeWorkflow(content: string, name: string): string { // Check if the workflows section is now empty const remaining = [...before.slice(sectionStart + 1), ...after.slice(0, sectionEnd - removeTo)]; - const hasRemainingWorkflows = remaining.some((l) => /^### /.test(l)); + const hasRemainingWorkflows = remaining.some((l) => l.startsWith("### ")); if (!hasRemainingWorkflows) { // Remove the entire ## Workflows section (heading + any blank lines) diff --git a/cli/selftune/workflows/workflows.ts b/cli/selftune/workflows/workflows.ts index a8c9c28a..e3836201 100644 --- a/cli/selftune/workflows/workflows.ts +++ b/cli/selftune/workflows/workflows.ts @@ -10,6 +10,7 @@ import { existsSync, readFileSync, writeFileSync } from "node:fs"; import { parseArgs } from "node:util"; + import { getDb } from "../localdb/db.js"; import { querySessionTelemetry, querySkillUsageRecords } from "../localdb/queries.js"; import type { diff --git a/docs/design-docs/alpha-remote-data-contract.md b/docs/design-docs/alpha-remote-data-contract.md index 9a092829..a64252d6 100644 --- a/docs/design-docs/alpha-remote-data-contract.md +++ b/docs/design-docs/alpha-remote-data-contract.md @@ -30,15 +30,15 @@ Alpha uploads target the existing selftune cloud API's V2 push endpoint (`POST / The `contribute/` system and the alpha upload pipeline serve different purposes but now share the same cloud API backend: -| Dimension | `contribute/` | Alpha upload | -|-----------|---------------|--------------| -| **Purpose** | Community sharing of anonymized eval data | Automatic telemetry for alpha cohort analysis | -| **Trigger** | Manual (`selftune contribute`) | Automatic (each `orchestrate` run) | -| **Transport** | HTTPS to cloud API | HTTPS to cloud API (`POST /api/v1/push`) | -| **Storage** | Neon Postgres (canonical tables) | Neon Postgres (canonical tables) | -| **Consent model** | Per-invocation confirmation | Enrollment flag in config (`config.alpha.enrolled`) + API key | -| **Data granularity** | Skill-level bundles with eval entries | Session-level, invocation-level, evolution-level V2 canonical records | -| **Privacy level** | Conservative or aggressive sanitization | Explicit alpha consent for raw prompt/query text plus structured telemetry | +| Dimension | `contribute/` | Alpha upload | +| -------------------- | ----------------------------------------- | -------------------------------------------------------------------------- | +| **Purpose** | Community sharing of anonymized eval data | Automatic telemetry for alpha cohort analysis | +| **Trigger** | Manual (`selftune contribute`) | Automatic (each `orchestrate` run) | +| **Transport** | HTTPS to cloud API | HTTPS to cloud API (`POST /api/v1/push`) | +| **Storage** | Neon Postgres (canonical tables) | Neon Postgres (canonical tables) | +| **Consent model** | Per-invocation confirmation | Enrollment flag in config (`config.alpha.enrolled`) + API key | +| **Data granularity** | Skill-level bundles with eval entries | Session-level, invocation-level, evolution-level V2 canonical records | +| **Privacy level** | Conservative or aggressive sanitization | Explicit alpha consent for raw prompt/query text plus structured telemetry | Both systems target the same cloud API, but alpha upload is automatic (when enrolled and an API key is configured) while contribute requires manual invocation and confirmation. @@ -113,14 +113,14 @@ All upload payloads use `schema_version: "2.0"` and contain canonical records th The V2 push payload contains typed canonical records: -| Record type | Description | -|-------------|-------------| -| `sessions` | Session summaries with platform, model, timing, and skill trigger metadata | -| `prompts` | User prompt/query records with raw text (alpha consent required) | -| `skill_invocations` | Skill trigger/miss records with confidence, mode, and query context | -| `execution_facts` | Tool usage, error counts, and execution metadata (deterministic `execution_fact_id` generated during staging for records that lack one) | -| `evolution_evidence` | Evolution proposal outcomes, pass rate changes, deploy/rollback status (deterministic `evidence_id` generated during staging) | -| `orchestrate_runs` | Orchestrate run reports with sync/evolve/watch phase summaries | +| Record type | Description | +| -------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `sessions` | Session summaries with platform, model, timing, and skill trigger metadata | +| `prompts` | User prompt/query records with raw text (alpha consent required) | +| `skill_invocations` | Skill trigger/miss records with confidence, mode, and query context | +| `execution_facts` | Tool usage, error counts, and execution metadata (deterministic `execution_fact_id` generated during staging for records that lack one) | +| `evolution_evidence` | Evolution proposal outcomes, pass rate changes, deploy/rollback status (deterministic `evidence_id` generated during staging) | +| `orchestrate_runs` | Orchestrate run reports with sync/evolve/watch phase summaries | ### Payload envelope @@ -164,14 +164,14 @@ The cloud API stores every push request in a `raw_pushes` table before normalizi The cloud API returns standard HTTP status codes: -| Status | Meaning | Client behavior | -|--------|---------|-----------------| -| `201 Created` | Records accepted and stored | Mark queue item as `sent` | -| `409 Conflict` | Duplicate records (already uploaded) | Treat as success, mark `sent` | -| `429 Too Many Requests` | Rate limited | Retryable — increment attempts, apply backoff | -| `401 Unauthorized` | Invalid or missing API key | Non-retryable — mark `failed`, log auth error | -| `403 Forbidden` | Key valid but user not authorized | Non-retryable — mark `failed`, log auth error | -| `5xx` | Server error | Retryable — increment attempts, apply backoff | +| Status | Meaning | Client behavior | +| ----------------------- | ------------------------------------ | --------------------------------------------- | +| `201 Created` | Records accepted and stored | Mark queue item as `sent` | +| `409 Conflict` | Duplicate records (already uploaded) | Treat as success, mark `sent` | +| `429 Too Many Requests` | Rate limited | Retryable — increment attempts, apply backoff | +| `401 Unauthorized` | Invalid or missing API key | Non-retryable — mark `failed`, log auth error | +| `403 Forbidden` | Key valid but user not authorized | Non-retryable — mark `failed`, log auth error | +| `5xx` | Server error | Retryable — increment attempts, apply backoff | --- @@ -193,6 +193,7 @@ Uploads happen at two touchpoints: - **Failure isolation.** If the cloud API is unreachable, the upload fails silently and retries next cycle. No impact on local selftune operation. **What NOT to do:** + - Do not upload from hooks (too latency-sensitive, runs in the critical path of user prompts). - Do not upload from the dashboard server (it is a read-only query surface). - Do not upload on every SQLite write (too frequent, creates thundering herd for multi-skill users). @@ -241,12 +242,12 @@ CREATE INDEX idx_upload_queue_created ON upload_queue(created_at); When retrying failed items within a single flush cycle: | Attempt | Delay before retry | -|---------|-------------------| -| 1 | 1 second | -| 2 | 2 seconds | -| 3 | 4 seconds | -| 4 | 8 seconds | -| 5 | 16 seconds | +| ------- | ------------------ | +| 1 | 1 second | +| 2 | 2 seconds | +| 3 | 4 seconds | +| 4 | 8 seconds | +| 5 | 16 seconds | After 5 failed attempts, the queue item stays at `status = 'failed'` and is not retried automatically. A future `selftune alpha retry` command could reset failed items. @@ -286,6 +287,7 @@ The cloud API validates every upload: ### Future: data deletion A future `selftune alpha delete-data` command will: + - Call a cloud API endpoint that deletes all records for the user's account. - Remove the `alpha` config block locally. - Confirm deletion to the agent. @@ -300,15 +302,15 @@ This aligns with the principle that alpha enrollment is fully reversible. The alpha pipeline uploads only the fields needed for alpha analysis, but it does include raw query text for explicitly consented users: -| Data category | What is uploaded | What is NOT uploaded | -|---------------|-----------------|---------------------| -| Queries | Raw query text (in `raw_source_ref.metadata`) | Full transcript bodies outside the captured prompt/query text | -| Workspace paths | Workspace path (in V2 canonical records) | N/A | -| File contents | Nothing | Nothing | -| Conversation text | Prompt/query text only | Full conversation transcripts | -| Code | Nothing | Nothing | -| File paths | Only if the user typed them into prompt/query text | Structured file-path fields | -| Session IDs | Session ID (opaque UUID) | N/A | +| Data category | What is uploaded | What is NOT uploaded | +| ----------------- | -------------------------------------------------- | ------------------------------------------------------------- | +| Queries | Raw query text (in `raw_source_ref.metadata`) | Full transcript bodies outside the captured prompt/query text | +| Workspace paths | Workspace path (in V2 canonical records) | N/A | +| File contents | Nothing | Nothing | +| Conversation text | Prompt/query text only | Full conversation transcripts | +| Code | Nothing | Nothing | +| File paths | Only if the user typed them into prompt/query text | Structured file-path fields | +| Session IDs | Session ID (opaque UUID) | N/A | ### What is explicitly excluded diff --git a/docs/design-docs/composability-v2.md b/docs/design-docs/composability-v2.md index ef6df1a1..8f22b52c 100644 --- a/docs/design-docs/composability-v2.md +++ b/docs/design-docs/composability-v2.md @@ -12,6 +12,7 @@ The current `eval composability` command only answers one question: "Do these skills conflict?" It computes a `conflict_score` based on error rate increases when skills co-occur. This is useful but limited. Users need answers to three more questions: + 1. "Do these skills work **better** together?" (synergy detection) 2. "Which skills are **always** used together?" (workflow candidates) 3. "In what **order** are they used?" (sequence detection) @@ -39,6 +40,7 @@ interface CoOccurrencePair { ``` Limitations: + - Only detects negatives (conflicts), never positives (synergies) - Only analyzes pairs, not sequences of 3+ - No ordering information — treats {A, B} same as {B, A} @@ -105,7 +107,7 @@ export function analyzeComposabilityV2( usage: SkillUsageRecord[], options?: { window?: number; - minOccurrences?: number; // default: 3 + minOccurrences?: number; // default: 3 }, ): ComposabilityReportV2; ``` @@ -130,7 +132,7 @@ Note: this is the **inverse** of `conflict_score`. A high conflict_score (bad) m The existing `conflict_detected` field remains for backwards compatibility: ```typescript -conflict_detected = synergy_score < -0.3 // same threshold, just inverted +conflict_detected = synergy_score < -0.3; // same threshold, just inverted ``` ### Sequence Detection @@ -229,13 +231,13 @@ The workflow documentation for the `eval composability` command should be update 1. New `--min-occurrences` flag 2. Synergy score interpretation table: -| Synergy Score | Interpretation | -|--------------|---------------| -| +0.6 to +1.0 | Strong synergy — skills work much better together | -| +0.3 to +0.6 | Moderate synergy — workflow candidate | -| -0.1 to +0.3 | No significant interaction | -| -0.3 to -0.1 | Minor friction — monitor | -| -1.0 to -0.3 | Conflict — skills interfere | +| Synergy Score | Interpretation | +| ------------- | ------------------------------------------------- | +| +0.6 to +1.0 | Strong synergy — skills work much better together | +| +0.3 to +0.6 | Moderate synergy — workflow candidate | +| -0.1 to +0.3 | No significant interaction | +| -0.3 to -0.1 | Minor friction — monitor | +| -1.0 to -0.3 | Conflict — skills interfere | 3. New "Detected Sequences" section in output 4. New "Workflow Candidates" section with actionable suggestions diff --git a/docs/design-docs/evolution-pipeline.md b/docs/design-docs/evolution-pipeline.md index 7df92049..d6c36850 100644 --- a/docs/design-docs/evolution-pipeline.md +++ b/docs/design-docs/evolution-pipeline.md @@ -169,25 +169,26 @@ terminal failure for that candidate. ### CLI Flags -| Flag | Default | Description | -|------|---------|-------------| -| `--pareto` | `true` | Enable Pareto multi-candidate selection | -| `--candidates` | `3` | Number of candidate proposals to generate (max 5) | -| `--dry-run` | `false` | Preview proposals without deploying | -| `--with-baseline` | `false` | Measure baseline lift before deploying; gate on lift > 0.05 | -| `--token-efficiency` | `false` | Compute token efficiency scores; adds 5th Pareto dimension | -| `--validation-model` | `haiku` | Model for trigger-check validation calls | -| `--proposal-model` | (agent default) | Model for proposal generation LLM calls | -| `--cheap-loop` | `false` | Use haiku for proposal/validation, sonnet for final gate | -| `--gate-model` | (none; `sonnet` when `--cheap-loop`) | Model for final gate validation before deploy | +| Flag | Default | Description | +| -------------------- | ------------------------------------ | ----------------------------------------------------------- | +| `--pareto` | `true` | Enable Pareto multi-candidate selection | +| `--candidates` | `3` | Number of candidate proposals to generate (max 5) | +| `--dry-run` | `false` | Preview proposals without deploying | +| `--with-baseline` | `false` | Measure baseline lift before deploying; gate on lift > 0.05 | +| `--token-efficiency` | `false` | Compute token efficiency scores; adds 5th Pareto dimension | +| `--validation-model` | `haiku` | Model for trigger-check validation calls | +| `--proposal-model` | (agent default) | Model for proposal generation LLM calls | +| `--cheap-loop` | `false` | Use haiku for proposal/validation, sonnet for final gate | +| `--gate-model` | (none; `sonnet` when `--cheap-loop`) | Model for final gate validation before deploy | ### Batch Trigger Validation -Trigger checks are batched (10 queries per LLM call by default) via `validateProposalBatched()`. This reduces LLM calls from 2N to ~2*(N/10). The sequential `validateProposalSequential()` is kept for backward compatibility. +Trigger checks are batched (10 queries per LLM call by default) via `validateProposalBatched()`. This reduces LLM calls from 2N to ~2\*(N/10). The sequential `validateProposalSequential()` is kept for backward compatibility. ### Cheap-Loop Mode When `--cheap-loop` is enabled: + 1. `proposalModel` defaults to `haiku` 2. `validationModel` defaults to `haiku` 3. `gateModel` defaults to `sonnet` @@ -224,13 +225,13 @@ Pure function that evaluates whether the retry loop should stop: Every state change is recorded to `~/.claude/evolution_audit_log.jsonl`: -| Action | When | -|--------|------| -| `created` | Proposal generated | -| `validated` | Validation completed | -| `rejected` | Confidence too low or validation failed | -| `deployed` | SKILL.md updated | -| `rolled_back` | Reverted to previous description | +| Action | When | +| ------------- | --------------------------------------- | +| `created` | Proposal generated | +| `validated` | Validation completed | +| `rejected` | Confidence too low or validation failed | +| `deployed` | SKILL.md updated | +| `rolled_back` | Reverted to previous description | Each entry includes: `timestamp`, `proposal_id`, `action`, `details`, optional `eval_snapshot`. @@ -240,11 +241,11 @@ Extends evolution beyond descriptions to routing tables and complete skill bodie ### Evolution Targets -| Target | What changes | Generator | Validator | -|--------|-------------|-----------|-----------| -| `description` | Text between `#` and first `##` | `propose-description.ts` | `validate-proposal.ts` | -| `routing_table` | `## Workflow Routing` table | `propose-routing.ts` | `validate-routing.ts` | -| `full_body` | Entire body below frontmatter | `propose-body.ts` | `validate-body.ts` | +| Target | What changes | Generator | Validator | +| --------------- | ------------------------------- | ------------------------ | ---------------------- | +| `description` | Text between `#` and first `##` | `propose-description.ts` | `validate-proposal.ts` | +| `routing_table` | `## Workflow Routing` table | `propose-routing.ts` | `validate-routing.ts` | +| `full_body` | Entire body below frontmatter | `propose-body.ts` | `validate-body.ts` | ### 3-Gate Validation @@ -309,27 +310,27 @@ Imports external evaluation tasks from the SkillsBench corpus: ## Files -| File | Responsibility | -|------|---------------| -| `grading/pre-gates.ts` | Deterministic pre-gate checks before LLM grading | -| `evolution/extract-patterns.ts` | Cluster missed queries into failure patterns (with optional feedback attachment) | -| `evolution/propose-description.ts` | LLM-based description improvement (single + multi-candidate) | -| `evolution/validate-proposal.ts` | Before/after eval set validation (with cached mode) | -| `evolution/pareto.ts` | Pareto frontier computation, candidate selection, token efficiency | -| `evolution/deploy-proposal.ts` | SKILL.md update, backup, PR creation, section parsing | -| `evolution/evolve.ts` | Description orchestrator with retry loop (standard + Pareto paths) | -| `evolution/propose-routing.ts` | LLM-based routing table proposal generation | -| `evolution/validate-routing.ts` | Routing table structural + trigger validation | -| `evolution/propose-body.ts` | Teacher LLM full body generation | -| `evolution/validate-body.ts` | 3-gate body validation (structural + trigger + quality) | -| `evolution/refine-body.ts` | Iterative body refinement from failure feedback | -| `evolution/evolve-body.ts` | Body/routing evolution orchestrator | -| `evolution/rollback.ts` | Revert to pre-evolution description | -| `evolution/stopping-criteria.ts` | Loop termination conditions | -| `evolution/audit.ts` | Append/read audit trail entries | -| `eval/baseline.ts` | No-skill baseline comparison and lift measurement | -| `eval/unit-test.ts` | Skill unit test runner | -| `eval/generate-unit-tests.ts` | Unit test auto-generation from skill content | -| `eval/composability.ts` | Multi-skill co-occurrence conflict detection | -| `eval/import-skillsbench.ts` | SkillsBench task corpus importer | -| `utils/trigger-check.ts` | Shared trigger-check prompt builder and parser | +| File | Responsibility | +| ---------------------------------- | -------------------------------------------------------------------------------- | +| `grading/pre-gates.ts` | Deterministic pre-gate checks before LLM grading | +| `evolution/extract-patterns.ts` | Cluster missed queries into failure patterns (with optional feedback attachment) | +| `evolution/propose-description.ts` | LLM-based description improvement (single + multi-candidate) | +| `evolution/validate-proposal.ts` | Before/after eval set validation (with cached mode) | +| `evolution/pareto.ts` | Pareto frontier computation, candidate selection, token efficiency | +| `evolution/deploy-proposal.ts` | SKILL.md update, backup, PR creation, section parsing | +| `evolution/evolve.ts` | Description orchestrator with retry loop (standard + Pareto paths) | +| `evolution/propose-routing.ts` | LLM-based routing table proposal generation | +| `evolution/validate-routing.ts` | Routing table structural + trigger validation | +| `evolution/propose-body.ts` | Teacher LLM full body generation | +| `evolution/validate-body.ts` | 3-gate body validation (structural + trigger + quality) | +| `evolution/refine-body.ts` | Iterative body refinement from failure feedback | +| `evolution/evolve-body.ts` | Body/routing evolution orchestrator | +| `evolution/rollback.ts` | Revert to pre-evolution description | +| `evolution/stopping-criteria.ts` | Loop termination conditions | +| `evolution/audit.ts` | Append/read audit trail entries | +| `eval/baseline.ts` | No-skill baseline comparison and lift measurement | +| `eval/unit-test.ts` | Skill unit test runner | +| `eval/generate-unit-tests.ts` | Unit test auto-generation from skill content | +| `eval/composability.ts` | Multi-skill co-occurrence conflict detection | +| `eval/import-skillsbench.ts` | SkillsBench task corpus importer | +| `utils/trigger-check.ts` | Shared trigger-check prompt builder and parser | diff --git a/docs/design-docs/index.md b/docs/design-docs/index.md index 188f1e8e..4467a865 100644 --- a/docs/design-docs/index.md +++ b/docs/design-docs/index.md @@ -4,24 +4,25 @@ Registry of all design documents with verification status. -| Document | Status | Last Verified | Owner | -|----------|--------|---------------|-------| -| system-overview.md | Current | 2026-03-16 | Team | -| core-beliefs.md | Current | 2026-02-28 | Team | -| evolution-pipeline.md | Current | 2026-03-04 | Team | -| monitoring-pipeline.md | Current | 2026-03-01 | Team | -| ../exec-plans/active/agent-first-skill-restructure.md | Current | 2026-02-28 | Team | -| sandbox-test-harness.md | Current | 2026-03-01 | Team | -| sandbox-architecture.md | Current | 2026-03-02 | Team | -| sandbox-claude-code.md | Current | 2026-03-02 | Team | -| live-dashboard-sse.md | Current | 2026-03-17 | Team | -| sqlite-first-migration.md | Current | 2026-03-17 | Team | -| ../integration-guide.md | Current | 2026-03-01 | Team | -| alpha-remote-data-contract.md | Current | 2026-03-18 | Team | +| Document | Status | Last Verified | Owner | +| ----------------------------------------------------- | ------- | ------------- | ----- | +| system-overview.md | Current | 2026-03-16 | Team | +| core-beliefs.md | Current | 2026-02-28 | Team | +| evolution-pipeline.md | Current | 2026-03-04 | Team | +| monitoring-pipeline.md | Current | 2026-03-01 | Team | +| ../exec-plans/active/agent-first-skill-restructure.md | Current | 2026-02-28 | Team | +| sandbox-test-harness.md | Current | 2026-03-01 | Team | +| sandbox-architecture.md | Current | 2026-03-02 | Team | +| sandbox-claude-code.md | Current | 2026-03-02 | Team | +| live-dashboard-sse.md | Current | 2026-03-17 | Team | +| sqlite-first-migration.md | Current | 2026-03-17 | Team | +| ../integration-guide.md | Current | 2026-03-01 | Team | +| alpha-remote-data-contract.md | Current | 2026-03-18 | Team | ## Verification Schedule Design docs are verified against the actual codebase on a recurring cadence: + - **Weekly**: Active design docs for in-progress features - **Monthly**: All design docs - **On change**: When related code is significantly modified diff --git a/docs/design-docs/live-dashboard-sse.md b/docs/design-docs/live-dashboard-sse.md index 36f44a95..285e159d 100644 --- a/docs/design-docs/live-dashboard-sse.md +++ b/docs/design-docs/live-dashboard-sse.md @@ -75,6 +75,7 @@ The hook is mounted once in `DashboardShell` (the root layout component). ### Polling as Fallback All React Query hooks retain `refetchInterval` but relaxed to 60s (was 15–30s). This serves as a safety net if: + - SSE connection drops and `EventSource` is reconnecting - The server restarts and watchers haven't initialized yet - The browser doesn't support SSE (unlikely but defensive) @@ -83,28 +84,28 @@ All React Query hooks retain `refetchInterval` but relaxed to 60s (was 15–30s) ## Target Latency Budget -| Stage | Time | -|-------|------| -| Hook writes SQLite | ~5ms | -| `fs.watchFile` poll interval | 500ms | -| Debounce window | 500ms | -| SSE broadcast + network | ~10ms | -| React Query invalidation + fetch | ~100ms | -| **Total** | **~1100ms** | +| Stage | Time | +| -------------------------------- | ----------- | +| Hook writes SQLite | ~5ms | +| `fs.watchFile` poll interval | 500ms | +| Debounce window | 500ms | +| SSE broadcast + network | ~10ms | +| React Query invalidation + fetch | ~100ms | +| **Total** | **~1100ms** | After the WAL cutover lands, new data should appear in the dashboard within ~1 second of a hook writing to SQLite. ## Files Changed -| File | Change | -|------|--------| -| `cli/selftune/dashboard-server.ts` | SSE endpoint, SQLite WAL watcher, broadcast, cleanup | -| `apps/local-dashboard/src/hooks/useSSE.ts` | New hook — EventSource + query invalidation | -| `apps/local-dashboard/src/App.tsx` | Mount `useSSE` in `DashboardShell` | -| `apps/local-dashboard/src/hooks/useOverview.ts` | Polling 15s → 60s fallback, staleTime 10s → 5s | -| `apps/local-dashboard/src/hooks/useSkillReport.ts` | Polling 30s → 60s fallback, staleTime 30s → 5s | -| `apps/local-dashboard/src/hooks/useDoctor.ts` | Polling 30s → 60s fallback, staleTime 20s → 5s | -| `apps/local-dashboard/src/hooks/useOrchestrateRuns.ts` | Polling 30s → 60s fallback, staleTime 15s → 5s | +| File | Change | +| ------------------------------------------------------ | ---------------------------------------------------- | +| `cli/selftune/dashboard-server.ts` | SSE endpoint, SQLite WAL watcher, broadcast, cleanup | +| `apps/local-dashboard/src/hooks/useSSE.ts` | New hook — EventSource + query invalidation | +| `apps/local-dashboard/src/App.tsx` | Mount `useSSE` in `DashboardShell` | +| `apps/local-dashboard/src/hooks/useOverview.ts` | Polling 15s → 60s fallback, staleTime 10s → 5s | +| `apps/local-dashboard/src/hooks/useSkillReport.ts` | Polling 30s → 60s fallback, staleTime 30s → 5s | +| `apps/local-dashboard/src/hooks/useDoctor.ts` | Polling 30s → 60s fallback, staleTime 20s → 5s | +| `apps/local-dashboard/src/hooks/useOrchestrateRuns.ts` | Polling 30s → 60s fallback, staleTime 15s → 5s | ## Design Decisions diff --git a/docs/design-docs/monitoring-pipeline.md b/docs/design-docs/monitoring-pipeline.md index c7fe91e0..6789a4a3 100644 --- a/docs/design-docs/monitoring-pipeline.md +++ b/docs/design-docs/monitoring-pipeline.md @@ -19,6 +19,7 @@ Read Logs → Window to Recent Sessions → Compute Snapshot → Detect Regressi Pure function that takes raw log records and produces a `MonitoringSnapshot`. No side effects, fully deterministic for a given input. **Inputs:** + - `skillName` — skill to monitor - `telemetry` — session telemetry records - `skillRecords` — skill usage records @@ -28,6 +29,7 @@ Pure function that takes raw log records and produces a `MonitoringSnapshot`. No - `regressionThreshold` — drop below baseline minus this triggers regression **Algorithm:** + 1. Window telemetry to last N sessions (by array order, assumed chronological) 2. Filter skill records by skill name 3. Apply session ID windowing (if telemetry overlaps with skill/query records) @@ -73,35 +75,35 @@ This avoids boundary issues like `0.8 - 0.1 = 0.7000000000000001`. ## MonitoringSnapshot Schema -| Field | Type | Description | -|-------|------|-------------| -| `timestamp` | string | ISO 8601 | -| `skill_name` | string | Monitored skill | -| `window_sessions` | number | Sessions in window | -| `pass_rate` | number | Current pass rate | -| `false_negative_rate` | number | Miss rate within skill checks | -| `by_invocation_type` | object | Breakdown by explicit/implicit/contextual/negative | -| `regression_detected` | boolean | Whether pass rate dropped below threshold | -| `baseline_pass_rate` | number | From last deployed audit entry | +| Field | Type | Description | +| --------------------- | ------- | -------------------------------------------------- | +| `timestamp` | string | ISO 8601 | +| `skill_name` | string | Monitored skill | +| `window_sessions` | number | Sessions in window | +| `pass_rate` | number | Current pass rate | +| `false_negative_rate` | number | Miss rate within skill checks | +| `by_invocation_type` | object | Breakdown by explicit/implicit/contextual/negative | +| `regression_detected` | boolean | Whether pass rate dropped below threshold | +| `baseline_pass_rate` | number | From last deployed audit entry | ## WatchResult Schema -| Field | Type | Description | -|-------|------|-------------| -| `snapshot` | MonitoringSnapshot | Computed snapshot | -| `alert` | string or null | Regression alert message | -| `rolledBack` | boolean | Whether auto-rollback executed | -| `recommendation` | string | Human-readable next step | +| Field | Type | Description | +| ---------------- | ------------------ | ------------------------------ | +| `snapshot` | MonitoringSnapshot | Computed snapshot | +| `alert` | string or null | Regression alert message | +| `rolledBack` | boolean | Whether auto-rollback executed | +| `recommendation` | string | Human-readable next step | ## Reuse by Observability Surfaces The `computeMonitoringSnapshot` pure function is the shared backbone for all three observability surfaces introduced in v0.1.4: -| Surface | File | How it uses `computeMonitoringSnapshot` | -|---------|------|----------------------------------------| -| `selftune status` | `cli/selftune/status.ts` | Computes per-skill pass rate, regression status, and trend for the CLI summary | -| `selftune dashboard` | `cli/selftune/dashboard.ts` | Pre-computes per-skill snapshots embedded in the HTML as `computed.snapshots` | -| `selftune watch` | `cli/selftune/monitoring/watch.ts` | Original use case — post-deploy regression detection with auto-rollback | +| Surface | File | How it uses `computeMonitoringSnapshot` | +| -------------------- | ---------------------------------- | ------------------------------------------------------------------------------ | +| `selftune status` | `cli/selftune/status.ts` | Computes per-skill pass rate, regression status, and trend for the CLI summary | +| `selftune dashboard` | `cli/selftune/dashboard.ts` | Pre-computes per-skill snapshots embedded in the HTML as `computed.snapshots` | +| `selftune watch` | `cli/selftune/monitoring/watch.ts` | Original use case — post-deploy regression detection with auto-rollback | This reuse validates the pure-function design: no side effects, fully deterministic, injectable inputs. The same function serves CLI, HTML, and monitoring without any modifications. @@ -117,6 +119,6 @@ The monitoring pipeline feeds into the auto-activation system to close the loop ## Files -| File | Responsibility | -|------|---------------| +| File | Responsibility | +| --------------------- | ---------------------------------------------------------------------- | | `monitoring/watch.ts` | Snapshot computation, log reading, regression detection, auto-rollback | diff --git a/docs/design-docs/sandbox-architecture.md b/docs/design-docs/sandbox-architecture.md index e37f1d1e..7f4f2dde 100644 --- a/docs/design-docs/sandbox-architecture.md +++ b/docs/design-docs/sandbox-architecture.md @@ -48,25 +48,28 @@ tests/sandbox/fixtures/ Three skills with deliberately different health profiles provide test coverage: -| Skill | Profile | Trigger Rate | Purpose | -|-------|---------|-------------|---------| -| `find-skills` | Healthy | 6/30 queries (20%) | Tests normal operation, deployed evolution | -| `frontend-design` | Sick | 0/30 queries (0%) | Tests undertrigger detection, evolution candidate | -| `ai-image-generation` | New | 1/30 queries (3%) | Tests minimal data handling | +| Skill | Profile | Trigger Rate | Purpose | +| --------------------- | ------- | ------------------ | ------------------------------------------------- | +| `find-skills` | Healthy | 6/30 queries (20%) | Tests normal operation, deployed evolution | +| `frontend-design` | Sick | 0/30 queries (0%) | Tests undertrigger detection, evolution candidate | +| `ai-image-generation` | New | 1/30 queries (3%) | Tests minimal data handling | **Data volume:** 15 sessions, 30 queries, 7 skill usage records, 3 evolution audit entries. ## Key Design Decisions ### 1. HOME Env Var Redirection + All selftune paths go through `homedir()` in `constants.ts`. Setting `HOME=/tmp/sandbox-*` redirects everything without modifying production code. ### 2. Two-Layer Architecture + - Layer 1 is free, fast (~400ms), and runs in CI - Layer 2 costs tokens and requires Docker, reserved for pre-release validation - Both share the same fixture data ### 3. Result Recording + Every test run saves a JSON report to `tests/sandbox/results/` with command, exit code, stdout, stderr, duration, and pass/fail. This creates a historical record of sandbox health. ## Makefile Targets @@ -74,6 +77,7 @@ Every test run saves a JSON report to `tests/sandbox/results/` with command, exi ```bash # Layer 1 (local, free, fast) make sandbox # Claude Code (default, backward-compatible) +make sandbox-install # Empty-state install validation make sandbox-codex # Codex make sandbox-opencode # OpenCode make sandbox-all # All agents in sequence @@ -84,17 +88,21 @@ make sandbox-llm-codex # Codex make sandbox-llm-opencode # OpenCode # Utility -make sandbox-shell # Claude Code container +make sandbox-reset # Remove persisted Claude sandbox HOME +make sandbox-reset-state # Remove selftune state, keep Claude auth +make sandbox-shell # Provisioned Claude Code container +make sandbox-shell-empty # Blank Claude Code container +make sandbox-shell-empty-workspace # Blank container with selftune wired to /app make sandbox-shell-codex # Codex container ``` ## Per-Agent Design Docs -| Agent | Design Doc | Status | -|-------|-----------|--------| +| Agent | Design Doc | Status | +| ----------- | ------------------------------------------------ | ----------- | | Claude Code | [sandbox-claude-code.md](sandbox-claude-code.md) | Implemented | -| Codex | sandbox-codex.md | Planned | -| OpenCode | sandbox-opencode.md | Planned | +| Codex | sandbox-codex.md | Planned | +| OpenCode | sandbox-opencode.md | Planned | ## Future Work diff --git a/docs/design-docs/sandbox-claude-code.md b/docs/design-docs/sandbox-claude-code.md index 34244cf9..5b20e90f 100644 --- a/docs/design-docs/sandbox-claude-code.md +++ b/docs/design-docs/sandbox-claude-code.md @@ -16,18 +16,18 @@ Claude Code-specific sandbox configuration, tests, and Docker container. See [sa **What it tests:** -| Command | Expected Behavior | -|---------|-------------------| -| `doctor` | Config + logs validated, hooks detected in settings.json | -| `eval generate --skill find-skills` | 6 positives, 24 negatives generated | -| `eval generate --skill frontend-design` | 0 positives (correctly identifies undertriggering) | -| `status` | Colored table with per-skill health | -| `last` | Latest session insight with unmatched queries | -| `dashboard --export` | Standalone HTML with embedded data | -| `contribute --preview` | Sanitized contribution bundle | -| Hook: prompt-log | Record appended to all_queries_log.jsonl | -| Hook: skill-eval | `skill_invocation` appended to canonical log / SQLite | -| Hook: session-stop | Record appended to session_telemetry_log.jsonl | +| Command | Expected Behavior | +| --------------------------------------- | -------------------------------------------------------- | +| `doctor` | Config + logs validated, hooks detected in settings.json | +| `eval generate --skill find-skills` | 6 positives, 24 negatives generated | +| `eval generate --skill frontend-design` | 0 positives (correctly identifies undertriggering) | +| `status` | Colored table with per-skill health | +| `last` | Latest session insight with unmatched queries | +| `dashboard --export` | Standalone HTML with embedded data | +| `contribute --preview` | Sanitized contribution bundle | +| Hook: prompt-log | Record appended to all_queries_log.jsonl | +| Hook: skill-eval | `skill_invocation` appended to canonical log / SQLite | +| Hook: session-stop | Record appended to session_telemetry_log.jsonl | **Performance:** 10 tests in ~400ms. @@ -51,11 +51,11 @@ This complements `make sandbox`, which is still a seeded smoke test against fixt **What it tests:** -| Command | Expected Behavior | -|---------|-------------------| -| `grade --skill find-skills` | LLM evaluates session against expectations | -| `evolve --skill frontend-design --dry-run` | LLM proposes improved description | -| `watch --skill find-skills` | Monitoring snapshot computed (no regression for healthy skill) | +| Command | Expected Behavior | +| ------------------------------------------ | -------------------------------------------------------------- | +| `grade --skill find-skills` | LLM evaluates session against expectations | +| `evolve --skill frontend-design --dry-run` | LLM proposes improved description | +| `watch --skill find-skills` | Monitoring snapshot computed (no regression for healthy skill) | ### Test 1: Grade (`find-skills`, session-001) @@ -85,23 +85,23 @@ This correctly detects the regression scenario encoded in the fixture data (~30m ### What the tests validate -| Concern | How it's validated | -|---------|-------------------| -| LLM integration | `grade` calls `claude -p`, parses response, produces structured output | -| CLI argument parsing | All commands receive correct flags and produce valid JSON | -| File I/O in sandbox | Commands read from and write to the sandboxed HOME directory | -| Evolution pipeline | `evolve` reads skill files, analyzes logs, returns valid result | -| Monitoring math | `watch` computes pass rates and detects regressions from log data | +| Concern | How it's validated | +| -------------------- | ---------------------------------------------------------------------- | +| LLM integration | `grade` calls `claude -p`, parses response, produces structured output | +| CLI argument parsing | All commands receive correct flags and produce valid JSON | +| File I/O in sandbox | Commands read from and write to the sandboxed HOME directory | +| Evolution pipeline | `evolve` reads skill files, analyzes logs, returns valid result | +| Monitoring math | `watch` computes pass rates and detects regressions from log data | ### What the tests don't cover -| Gap | Why | -|-----|-----| -| All 3 skills graded | Only `find-skills` session-001 is graded (cost control) | -| Actual skill rewriting | `evolve --dry-run` never modifies SKILL.md | -| Rollback after regression | `watch` detects regression but doesn't test `evolve rollback` | -| Multi-session grading | Only 1 of 15 sessions is graded | -| `ai-image-generation` in Layer 2 | Only exercised in Layer 1 via `eval generate` | +| Gap | Why | +| -------------------------------- | ------------------------------------------------------------- | +| All 3 skills graded | Only `find-skills` session-001 is graded (cost control) | +| Actual skill rewriting | `evolve --dry-run` never modifies SKILL.md | +| Rollback after regression | `watch` detects regression but doesn't test `evolve rollback` | +| Multi-session grading | Only 1 of 15 sessions is graded | +| `ai-image-generation` in Layer 2 | Only exercised in Layer 1 via `eval generate` | These are candidates for future test expansion. @@ -114,8 +114,10 @@ make sandbox-install # Layer 2: First-time auth setup (one-time) make sandbox-reset # clear persisted Docker sandbox HOME if needed +make sandbox-reset-state # clear selftune state but keep Claude login make sandbox-shell # drop into provisioned container make sandbox-shell-empty # drop into blank "white room" container +make sandbox-shell-empty-workspace # blank container, but selftune linked to /app claude login # paste token, then exit # Layer 2: Run LLM tests (auth persists in Docker volume) @@ -125,9 +127,13 @@ make sandbox-llm make sandbox-shell # White-room manual onboarding -make sandbox-reset +make sandbox-reset-state make sandbox-shell-empty +# White-room shell using the current workspace code +make sandbox-reset-state +make sandbox-shell-empty-workspace + # Full check: lint + unit tests + sandbox make check ``` @@ -137,6 +143,15 @@ to install skills manually, then tell Claude "setup selftune" and observe the actual onboarding path. Use `sandbox-shell` when you want the preseeded fixture environment for repeatable functional checks. +Use `sandbox-shell-empty-workspace` when you want the same blank container state +but need Claude to run the selftune CLI and skill directly from the current +workspace at `/app` instead of from npm or a separately installed copy. + +Use `sandbox-reset-state` when you want to keep Claude Code authenticated but +clear selftune config, hooks, logs, and installed skills from the sandbox. +Use `sandbox-reset` only when you want to wipe the entire sandbox HOME, +including Claude auth. + **Auth options:** `claude login` inside the container (persists in Docker volume), `ANTHROPIC_API_KEY` in `.env.local`, or VS Code devcontainer. ## Future Work diff --git a/docs/design-docs/sandbox-test-harness.md b/docs/design-docs/sandbox-test-harness.md index 61455ebc..6bb99831 100644 --- a/docs/design-docs/sandbox-test-harness.md +++ b/docs/design-docs/sandbox-test-harness.md @@ -23,18 +23,18 @@ selftune had 499 unit tests covering individual functions, but zero integration **What it tests:** -| Command | Expected Behavior | -|---------|-------------------| -| `doctor` | Config + logs validated, hooks detected in settings.json | -| `evals --skill find-skills` | 6 positives, 24 negatives generated | -| `evals --skill frontend-design` | 0 positives (correctly identifies undertriggering) | -| `status` | Colored table with per-skill health | -| `last` | Latest session insight with unmatched queries | -| `dashboard --export` | Standalone HTML with embedded data | -| `contribute --preview` | Sanitized contribution bundle | -| Hook: prompt-log | Record appended to all_queries_log.jsonl | -| Hook: skill-eval | Record appended to skill_usage_log.jsonl | -| Hook: session-stop | Record appended to session_telemetry_log.jsonl | +| Command | Expected Behavior | +| ------------------------------- | -------------------------------------------------------- | +| `doctor` | Config + logs validated, hooks detected in settings.json | +| `evals --skill find-skills` | 6 positives, 24 negatives generated | +| `evals --skill frontend-design` | 0 positives (correctly identifies undertriggering) | +| `status` | Colored table with per-skill health | +| `last` | Latest session insight with unmatched queries | +| `dashboard --export` | Standalone HTML with embedded data | +| `contribute --preview` | Sanitized contribution bundle | +| Hook: prompt-log | Record appended to all_queries_log.jsonl | +| Hook: skill-eval | Record appended to skill_usage_log.jsonl | +| Hook: session-stop | Record appended to session_telemetry_log.jsonl | **Performance:** 10 tests in ~400ms. @@ -46,11 +46,11 @@ selftune had 499 unit tests covering individual functions, but zero integration **What it tests:** -| Command | Expected Behavior | -|---------|-------------------| -| `grade --skill find-skills` | LLM evaluates session against expectations | -| `evolve --skill frontend-design --dry-run` | LLM proposes improved description | -| `watch --skill find-skills` | Monitoring snapshot computed (no regression for healthy skill) | +| Command | Expected Behavior | +| ------------------------------------------ | -------------------------------------------------------------- | +| `grade --skill find-skills` | LLM evaluates session against expectations | +| `evolve --skill frontend-design --dry-run` | LLM proposes improved description | +| `watch --skill find-skills` | Monitoring snapshot computed (no regression for healthy skill) | **Cost:** Uses existing Claude subscription — no per-call API charges. @@ -58,28 +58,32 @@ selftune had 499 unit tests covering individual functions, but zero integration Three skills with deliberately different health profiles: -| Skill | Profile | Trigger Rate | Purpose | -|-------|---------|-------------|---------| -| `find-skills` | Healthy | 6/30 queries (20%) | Tests normal operation, deployed evolution | -| `frontend-design` | Sick | 0/30 queries (0%) | Tests undertrigger detection, evolution candidate | -| `ai-image-generation` | New | 1/30 queries (3%) | Tests minimal data handling | +| Skill | Profile | Trigger Rate | Purpose | +| --------------------- | ------- | ------------------ | ------------------------------------------------- | +| `find-skills` | Healthy | 6/30 queries (20%) | Tests normal operation, deployed evolution | +| `frontend-design` | Sick | 0/30 queries (0%) | Tests undertrigger detection, evolution candidate | +| `ai-image-generation` | New | 1/30 queries (3%) | Tests minimal data handling | **Data volume:** 15 sessions, 30 queries, 7 skill usage records, 3 evolution audit entries. ## Key Design Decisions ### 1. HOME Env Var Redirection + All selftune paths go through `homedir()` in `constants.ts`. Setting `HOME=/tmp/sandbox-*` redirects everything without modifying production code. ### 2. Two-Layer Architecture + - Layer 1 is free, fast (~400ms), and runs in CI - Layer 2 costs tokens and requires Docker, reserved for pre-release validation - Both share the same fixture data ### 3. Devcontainer-Based Isolation + Extends the official Claude Code devcontainer reference with firewall, Bun runtime, and sandbox HOME. Production code is unchanged and maintains zero dependencies. ### 4. Result Recording + Every test run saves a JSON report to `tests/sandbox/results/` with command, exit code, stdout, stderr, duration, and pass/fail. This creates a historical record of sandbox health. ## Running @@ -87,25 +91,39 @@ Every test run saves a JSON report to `tests/sandbox/results/` with command, exi ```bash # Layer 1: Local (free, fast) make sandbox +make sandbox-install # Layer 2: Devcontainer + LLM (uses existing Claude subscription) +make sandbox-reset +make sandbox-reset-state make sandbox-llm +make sandbox-shell +make sandbox-shell-empty +make sandbox-shell-empty-workspace # Full check: lint + unit tests + sandbox make check ``` +Use `make sandbox-shell-empty` for a true white-room onboarding shell. Use +`make sandbox-shell-empty-workspace` when you want the same blank state but need +Claude to execute the selftune CLI and skill directly from the current +workspace at `/app`. + +Use `make sandbox-reset-state` when you want to preserve Claude login but clear +the selftune-specific sandbox state before another onboarding run. + ### Layer 1: OpenClaw Sandbox Tests **Added to** `tests/sandbox/run-sandbox.ts`: -| Test Name | Command | Verification | -|-----------|---------|-------------| -| `ingest openclaw` | `ingest openclaw --agents-dir ` | Exit 0 + openclaw records in logs | -| `ingest openclaw --dry-run` | `ingest openclaw --agents-dir --dry-run` | Exit 0 + no new log records | -| `ingest openclaw (idempotent)` | Run ingest twice | Second run: "0 not yet ingested" | -| `cron list` | `cron list` | Exit 0 + shows selftune-ingest | -| `cron setup --dry-run` | `cron setup --dry-run --tz UTC` | Exit 0 + shows [DRY RUN] | +| Test Name | Command | Verification | +| ------------------------------ | -------------------------------------------------- | --------------------------------- | +| `ingest openclaw` | `ingest openclaw --agents-dir ` | Exit 0 + openclaw records in logs | +| `ingest openclaw --dry-run` | `ingest openclaw --agents-dir --dry-run` | Exit 0 + no new log records | +| `ingest openclaw (idempotent)` | Run ingest twice | Second run: "0 not yet ingested" | +| `cron list` | `cron list` | Exit 0 + shows selftune-ingest | +| `cron setup --dry-run` | `cron setup --dry-run --tz UTC` | Exit 0 + shows [DRY RUN] | **Fixtures:** 5 sessions across 2 agents, 2 skills (Deploy, CodeReview), cron jobs. @@ -114,29 +132,30 @@ make check **Purpose:** Test selftune against a real OpenClaw gateway in Docker. **Architecture:** + - `openclaw-gateway` service: Real OpenClaw gateway with health check - `selftune-openclaw` service: Runs test orchestrator after gateway is healthy - Named Docker volumes persist data across container restarts **Tests:** -| Test Name | What It Does | Verification | -|-----------|-------------|-------------| -| `gateway-health` | Curl gateway /healthz | HTTP 200 | -| `ingest openclaw` | Run ingestion against gateway data | Exit 0 + log records | -| `cron setup --dry-run` | Register cron jobs (dry-run) | Exit 0 + dry-run output | -| `cron list` | List registered jobs | Exit 0 + shows jobs | -| `status` | Show skill health post-ingestion | Exit 0 + output | -| `doctor` | Run health checks | JSON with checks array | +| Test Name | What It Does | Verification | +| ---------------------- | ---------------------------------- | ----------------------- | +| `gateway-health` | Curl gateway /healthz | HTTP 200 | +| `ingest openclaw` | Run ingestion against gateway data | Exit 0 + log records | +| `cron setup --dry-run` | Register cron jobs (dry-run) | Exit 0 + dry-run output | +| `cron list` | List registered jobs | Exit 0 + shows jobs | +| `status` | Show skill health post-ingestion | Exit 0 + output | +| `doctor` | Run health checks | JSON with checks array | **Persistence:** -| What | Volume | Persists? | -|------|--------|-----------| -| OpenClaw gateway data | `openclaw-config` | Yes | -| Selftune log data | `selftune-data` | Yes | -| Selftune config/markers | `selftune-config` | Yes | -| Test result reports | `selftune-results` | Yes | +| What | Volume | Persists? | +| ----------------------- | ------------------ | --------- | +| OpenClaw gateway data | `openclaw-config` | Yes | +| Selftune log data | `selftune-data` | Yes | +| Selftune config/markers | `selftune-config` | Yes | +| Test result reports | `selftune-results` | Yes | **Running:** diff --git a/docs/design-docs/sqlite-first-migration.md b/docs/design-docs/sqlite-first-migration.md index 11a807ce..181aeea0 100644 --- a/docs/design-docs/sqlite-first-migration.md +++ b/docs/design-docs/sqlite-first-migration.md @@ -13,6 +13,7 @@ Phase 2 is complete. Phase 3 is in progress. ## Problem JSONL-as-source-of-truth caused: + - **9.5s dashboard load times** — materializer re-reading 370MB of JSONL on every request cycle - **7-file change propagation** on schema changes (JSONL write, schema def, materializer, types, dashboard contract, route handler, tests) - **Dual data paths** (JSONL tables vs SQLite tables) causing wrong-table bugs when queries hit stale materialized data @@ -58,34 +59,34 @@ Hook → SQLite INSERT (via direct-write.ts) → WAL watcher → SSE broadcast ## Files Created -| File | Purpose | -|------|---------| +| File | Purpose | +| -------------------------------------- | -------------------------------------------- | | `cli/selftune/localdb/direct-write.ts` | Fail-open insert functions for all 11 tables | -| `cli/selftune/export.ts` | SQLite → JSONL export command | -| `cli/selftune/routes/*.ts` | 7 extracted route handlers + index | +| `cli/selftune/export.ts` | SQLite → JSONL export command | +| `cli/selftune/routes/*.ts` | 7 extracted route handlers + index | ## Files Modified 78 files changed, 2033 insertions, 1533 deletions. Key areas: -| Area | Files | -|------|-------| -| Hooks | All hook handlers (`hooks/*.ts`) — dual-write path | -| Ingestors | All platform adapters — dual-write path | -| Evolution | `evolution/*.ts` — read from SQLite, write via direct-write | -| Orchestrate + Grading | `orchestrate.ts`, `grading/*.ts` — SQLite reads | -| Dashboard | `dashboard-server.ts`, SQLite-backed routes, transitional SSE invalidation | -| CI | Workflow updated for new test structure | +| Area | Files | +| --------------------- | -------------------------------------------------------------------------- | +| Hooks | All hook handlers (`hooks/*.ts`) — dual-write path | +| Ingestors | All platform adapters — dual-write path | +| Evolution | `evolution/*.ts` — read from SQLite, write via direct-write | +| Orchestrate + Grading | `orchestrate.ts`, `grading/*.ts` — SQLite reads | +| Dashboard | `dashboard-server.ts`, SQLite-backed routes, transitional SSE invalidation | +| CI | Workflow updated for new test structure | ## Impact -| Metric | Before | After | -|--------|--------|-------| -| Dashboard load (first call) | 9.5s | 86ms | -| Dashboard load (subsequent) | ~2s (TTL hit) | 15ms | -| Data latency (hook → dashboard) | 15–30s | <1s (WAL-only SSE shipped) | -| Schema change propagation | 7 files | 4 files | -| Test delta | baseline | +2 passing, -2 failures | +| Metric | Before | After | +| ------------------------------- | ------------- | -------------------------- | +| Dashboard load (first call) | 9.5s | 86ms | +| Dashboard load (subsequent) | ~2s (TTL hit) | 15ms | +| Data latency (hook → dashboard) | 15–30s | <1s (WAL-only SSE shipped) | +| Schema change propagation | 7 files | 4 files | +| Test delta | baseline | +2 passing, -2 failures | ## Limitations diff --git a/docs/design-docs/system-overview.md b/docs/design-docs/system-overview.md index 520a2864..12f8b5bc 100644 --- a/docs/design-docs/system-overview.md +++ b/docs/design-docs/system-overview.md @@ -155,17 +155,17 @@ The dashboard SPA consumes shared presentational components from `packages/ui/` ## The Main Local Artifacts -| Artifact | Role | -|---|---| -| `~/.claude/*.jsonl` | Shared append-only logs for telemetry, queries, repaired usage, and evolution audit | -| `~/.claude/orchestrate_runs.jsonl` | Persisted orchestrate run reports for CLI and dashboard inspection | -| `~/.claude/improvement_signals.jsonl` | Real-time improvement signals from user corrections and explicit skill requests | -| `~/.claude/.orchestrate.lock` | Lockfile preventing concurrent orchestrate runs (30-min stale threshold) | -| `selftune sync` | Rebuilds trustworthy local evidence from source systems | -| `cli/selftune/localdb/` | Materializes logs into SQLite tables and payload-oriented queries | -| `cli/selftune/dashboard-server.ts` | Serves the SPA and the v2 dashboard API | -| `packages/ui/` | Shared UI components, primitives, and types for dashboard SPAs | -| `apps/local-dashboard/` | Overview, per-skill report, system status/diagnostics UI | +| Artifact | Role | +| ------------------------------------- | ----------------------------------------------------------------------------------- | +| `~/.claude/*.jsonl` | Shared append-only logs for telemetry, queries, repaired usage, and evolution audit | +| `~/.claude/orchestrate_runs.jsonl` | Persisted orchestrate run reports for CLI and dashboard inspection | +| `~/.claude/improvement_signals.jsonl` | Real-time improvement signals from user corrections and explicit skill requests | +| `~/.claude/.orchestrate.lock` | Lockfile preventing concurrent orchestrate runs (30-min stale threshold) | +| `selftune sync` | Rebuilds trustworthy local evidence from source systems | +| `cli/selftune/localdb/` | Materializes logs into SQLite tables and payload-oriented queries | +| `cli/selftune/dashboard-server.ts` | Serves the SPA and the v2 dashboard API | +| `packages/ui/` | Shared UI components, primitives, and types for dashboard SPAs | +| `apps/local-dashboard/` | Overview, per-skill report, system status/diagnostics UI | ## What selftune Is Not diff --git a/docs/design-docs/workflow-support.md b/docs/design-docs/workflow-support.md index c6944bfe..9af012cc 100644 --- a/docs/design-docs/workflow-support.md +++ b/docs/design-docs/workflow-support.md @@ -12,12 +12,14 @@ selftune monitors individual skills. But users don't use skills individually — they chain them. A user asks "write and publish a blog post" and the agent invokes copywriting, marketing, SEO, and blog publishing in sequence. selftune has zero visibility into whether that multi-skill chain worked. Real example from a March 8 session: + 1. MarketingAutomation/Copywriting → drafted blog post with SEO principles 2. Source analysis doc → read competitive analysis for content 3. Content composition → assembled final markdown 4. SelfTuneBlog → published to selftune.dev The agent orchestrated 4 skills manually. selftune saw 4 individual skill invocations. It couldn't tell: + - Whether the handoff between skills was clean - Whether the overall workflow succeeded - Whether a different skill order would have been better @@ -116,17 +118,20 @@ Add an optional `## Workflows` section to SKILL.md: ## Workflows ### Blog Publishing + - **Skills:** Copywriting → MarketingAutomation → SelfTuneBlog - **Trigger:** User asks to write and publish a blog post - **Source:** Discovered from 12 sessions (synergy: 0.72) ### Security Audit + - **Skills:** Recon → WebAssessment → Security - **Trigger:** User asks for a security assessment - **Source:** Authored ``` This section is: + - **Optional** — skills work fine without it - **Informational** — the agent reads it for context, not as a hard execution plan - **Backwards compatible** — agents that don't understand `## Workflows` simply ignore it @@ -140,6 +145,7 @@ Individual skills can reference related skills: # Copywriting ## Related Skills + - **Often followed by:** SelfTuneBlog, SocialContent - **Often preceded by:** Research, ContentAnalysis - **Synergy score:** 0.72 with SelfTuneBlog (12 sessions) @@ -161,13 +167,13 @@ Computed at analysis time: #### Workflow Quality Metrics -| Metric | Computation | What It Tells You | -|--------|-------------|-------------------| -| **Synergy score** | `(errors_individual - errors_workflow) / (errors_individual + 1)` | Do these skills work better together than apart? | -| **Sequence consistency** | % of occurrences with same skill order | Is the ordering stable or chaotic? | -| **Completion rate** | % of sessions where all skills in sequence fired | Does the full chain execute? | -| **Handoff quality** | Error rate at skill transitions (error in skill N+1 after skill N) | Where do handoffs break? | -| **Workflow trigger rate** | % of queries that should trigger the workflow and do | Same as individual trigger accuracy, but workflow-level | +| Metric | Computation | What It Tells You | +| ------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------- | +| **Synergy score** | `(errors_individual - errors_workflow) / (errors_individual + 1)` | Do these skills work better together than apart? | +| **Sequence consistency** | % of occurrences with same skill order | Is the ordering stable or chaotic? | +| **Completion rate** | % of sessions where all skills in sequence fired | Does the full chain execute? | +| **Handoff quality** | Error rate at skill transitions (error in skill N+1 after skill N) | Where do handoffs break? | +| **Workflow trigger rate** | % of queries that should trigger the workflow and do | Same as individual trigger accuracy, but workflow-level | ### CLI Commands @@ -226,6 +232,7 @@ that is not part of the current command surface. ### Cross-Platform Support Workflow discovery works on any platform that produces `skill_usage_log.jsonl`: + - Claude Code: native hook support - Codex: via `selftune ingest codex` - OpenCode: via `selftune ingest opencode` @@ -235,17 +242,18 @@ No platform-specific logic needed. The analysis operates on the shared log schem ### Implementation Phases -| Phase | What Ships | Builds On | -|-------|-----------|-----------| -| **v0.3** | `selftune workflows` (discovery + display) | Composability v2 | -| **v0.3** | `selftune workflows save` (codify) | SKILL.md format extension | +| Phase | What Ships | Builds On | +| -------- | ------------------------------------------------------ | --------------------------- | +| **v0.3** | `selftune workflows` (discovery + display) | Composability v2 | +| **v0.3** | `selftune workflows save` (codify) | SKILL.md format extension | | **v0.4** | `selftune workflows evolve` (workflow-level evolution) | Existing evolution pipeline | -| **v0.4** | Handoff quality metrics | Workflow telemetry analysis | -| **v0.5** | `## Related Skills` auto-generation | Discovery data | +| **v0.4** | Handoff quality metrics | Workflow telemetry analysis | +| **v0.5** | `## Related Skills` auto-generation | Discovery data | ### Zero-Dependency Compliance All workflow analysis is: + - **Pure functions** operating on JSONL arrays (same pattern as composability.ts) - **No new log files** — computed from existing `skill_usage_log.jsonl` + `session_telemetry_log.jsonl` - **No runtime dependencies** — standard TypeScript/Bun @@ -273,6 +281,7 @@ selftune workflows save "Copywriting→SelfTuneBlog" ``` **Current shipped behavior:** + - Discover repeated ordered skill chains from telemetry - Show synergy, consistency, and completion metrics - Append a discovered workflow to `## Workflows` in SKILL.md diff --git a/docs/escalation-policy.md b/docs/escalation-policy.md index 25f4e2c4..94db3aed 100644 --- a/docs/escalation-policy.md +++ b/docs/escalation-policy.md @@ -6,11 +6,11 @@ Clear criteria for when agents proceed autonomously vs. when to involve a human. ## Risk Tiers -| Tier | Agent Authority | Human Required | -|------|----------------|----------------| -| **Low** | Proceed autonomously | Notify after completion | -| **Medium** | Propose change, await approval | Review before merge | -| **High** | Draft only, do not execute | Explicit sign-off required | +| Tier | Agent Authority | Human Required | +| ---------- | ------------------------------ | -------------------------- | +| **Low** | Proceed autonomously | Notify after completion | +| **Medium** | Propose change, await approval | Review before merge | +| **High** | Draft only, do not execute | Explicit sign-off required | ## Escalation Triggers diff --git a/docs/exec-plans/active/agent-first-alpha-onboarding.md b/docs/exec-plans/active/agent-first-alpha-onboarding.md index c05b12e1..3200a8e2 100644 --- a/docs/exec-plans/active/agent-first-alpha-onboarding.md +++ b/docs/exec-plans/active/agent-first-alpha-onboarding.md @@ -9,11 +9,13 @@ Make the real alpha user path happen through the user's coding agent and the local CLI, not through the cloud frontend as the primary UX. The cloud app remains the control plane for: + - sign-in - alpha enrollment - upload credential issuance But the user's experience should be: + 1. tell the agent to set up selftune 2. complete the minimum cloud auth handoff 3. return to the agent/CLI flow @@ -23,6 +25,7 @@ But the user's experience should be: The cloud app is a dependency, not the main product surface. The main product surface remains: + - `skill/SKILL.md` - `skill/Workflows/Initialize.md` - `selftune init` @@ -32,6 +35,7 @@ The main product surface remains: **Goal:** specify the exact setup sequence the agent should follow. ### Deliverable + - a short flow spec covering: - user says "set up selftune" - agent checks local config @@ -41,6 +45,7 @@ The main product surface remains: - agent finishes setup and verifies upload readiness ### Acceptance + - no ambiguity about where browser handoff happens - no ambiguity about what the agent asks the user - no ambiguity about when the flow returns to local CLI mode @@ -50,11 +55,13 @@ The main product surface remains: **Goal:** stop treating alpha identity as a separate local-only user model. ### Files + - `cli/selftune/alpha-identity.ts` - `cli/selftune/types.ts` - `cli/selftune/init.ts` ### Work + - treat cloud-linked identity as authoritative - keep local config as a cache of: - cloud user id @@ -64,6 +71,7 @@ The main product surface remains: - remove assumptions that local email/user id are the real alpha identity source ### Acceptance + - local config reflects linked cloud identity, not a separate parallel identity model ## Ticket 3: Add CLI Support for Cloud Linking State @@ -71,11 +79,13 @@ The main product surface remains: **Goal:** make `selftune init` and related commands aware of cloud link status. ### Files + - `cli/selftune/init.ts` - `cli/selftune/status.ts` - `cli/selftune/observability.ts` ### Work + - detect whether cloud identity + upload credential are present - show clear agent-facing next steps when missing - expose whether alpha upload is: @@ -85,6 +95,7 @@ The main product surface remains: - ready ### Acceptance + - agent can reliably diagnose why alpha upload is not active ## Ticket 4: Add Browser Handoff UX for the Agent @@ -92,11 +103,13 @@ The main product surface remains: **Goal:** make the unavoidable cloud step feel intentional and small. ### Files + - `skill/Workflows/Initialize.md` - `skill/SKILL.md` - `skill/references/interactive-config.md` ### Work + - tell the agent exactly when to ask the user to sign in to the cloud app - tell the agent exactly when to ask the user to issue an upload credential - make the copy explicit: @@ -104,6 +117,7 @@ The main product surface remains: - afterwards the workflow returns to the local agent/CLI path ### Acceptance + - the agent does not present the cloud app as the main way to use selftune ## Ticket 5: Add Credential Import / Storage Path @@ -111,16 +125,19 @@ The main product surface remains: **Goal:** let the agent finish setup after the user gets a cloud-issued credential. ### Files + - `cli/selftune/init.ts` - `cli/selftune/alpha-upload/index.ts` - local config read/write helpers ### Work + - accept product-issued `st_live_*` credential in setup flow - store it locally in the expected config location - validate presence/format before marking setup complete ### Acceptance + - after credential issuance, the agent can finish setup without manual file editing ## Ticket 6: Add Upload Readiness Verification @@ -128,11 +145,13 @@ The main product surface remains: **Goal:** prove the local machine is actually ready after setup. ### Files + - `cli/selftune/init.ts` - `cli/selftune/observability.ts` - `skill/Workflows/Initialize.md` ### Work + - run a small readiness check after setup: - config present - enrollment/credential fields present @@ -141,6 +160,7 @@ The main product surface remains: - return agent-facing confirmation or exact remediation ### Acceptance + - setup ends with a concrete readiness result, not “probably done” ## Ticket 7: Update Agent Docs to Match the New Truth @@ -148,17 +168,20 @@ The main product surface remains: **Goal:** keep the agent-first product surface aligned with the new onboarding path. ### Files + - `skill/SKILL.md` - `skill/Workflows/Initialize.md` - `skill/Workflows/Doctor.md` - `skill/Workflows/Dashboard.md` if any cloud references exist ### Work + - make the setup workflow explicitly agent-first - describe cloud auth as a required one-time control-plane handoff - remove any implication that users should live in the cloud UI for normal use ### Acceptance + - docs match the intended product story ## Ticket 8: Add End-to-End Setup Smoke Test @@ -166,12 +189,14 @@ The main product surface remains: **Goal:** verify the intended user path, not just the pieces. ### Scope + - temp local config - simulated or staged cloud-issued credential - `selftune init` - readiness verification ### Acceptance + - one passing test proves the setup can go from fresh machine to upload-ready ## Recommended Order diff --git a/docs/exec-plans/active/alpha-rollout-data-loop-plan.md b/docs/exec-plans/active/alpha-rollout-data-loop-plan.md index fbb44dc0..13a7edb1 100644 --- a/docs/exec-plans/active/alpha-rollout-data-loop-plan.md +++ b/docs/exec-plans/active/alpha-rollout-data-loop-plan.md @@ -330,18 +330,18 @@ This work still matters, but it should follow the data loop, not precede it. ### Phase C (completed 2026-03-18) Wave 1 (parallel): + 1. **Agent 1:** Queue + watermark storage (20 tests) 2. **Agent 2:** Payload builder from SQLite (19 tests) 3. **Agent 3:** HTTP client + flush engine (15 tests) 4. **Agent 4:** Cloud API integration (replaced standalone Worker scaffold) (17 tests) -Wave 2 (after Wave 1): -5. **Agent 5:** CLI + orchestrate integration (10 tests) -6. **Agent 6:** Upload status + doctor diagnostics (17 tests) +Wave 2 (after Wave 1): 5. **Agent 5:** CLI + orchestrate integration (10 tests) 6. **Agent 6:** Upload status + doctor diagnostics (17 tests) ### Next split suggestion Phase D is the next active target: + 1. **Agent 1:** Four-quadrant analysis view (TP/FP/FN/TN) 2. **Agent 2:** Labeling + review mechanism 3. **Agent 3:** Operator inspection flow (Daniel-only) diff --git a/docs/exec-plans/completed/agent-first-skill-restructure.md b/docs/exec-plans/completed/agent-first-skill-restructure.md index f3138c9b..e2e1b3ac 100644 --- a/docs/exec-plans/completed/agent-first-skill-restructure.md +++ b/docs/exec-plans/completed/agent-first-skill-restructure.md @@ -88,11 +88,11 @@ New command: `selftune init [--agent ] [--cli-path ]` **Auto-detection signals:** -| Agent | Env/Filesystem Signals | -|-------|----------------------| -| Claude Code | `~/.claude/` exists, `which claude` succeeds | -| Codex | `$CODEX_HOME` set, `which codex` succeeds | -| OpenCode | `~/.local/share/opencode/opencode.db` exists, `which opencode` succeeds | +| Agent | Env/Filesystem Signals | +| ----------- | ----------------------------------------------------------------------- | +| Claude Code | `~/.claude/` exists, `which claude` succeeds | +| Codex | `$CODEX_HOME` set, `which codex` succeeds | +| OpenCode | `~/.local/share/opencode/opencode.db` exists, `which opencode` succeeds | **Init workflow:** @@ -112,52 +112,52 @@ New command: `selftune init [--agent ] [--cli-path ]` Decompose the monolithic SKILL.md into the target directory structure. -| Step | Description | Depends On | -|------|-------------|------------| -| A1 | Extract grading methodology from SKILL.md → `references/grading-methodology.md` | — | -| A2 | Extract invocation taxonomy from SKILL.md → `references/invocation-taxonomy.md` | — | -| A3 | Create `Workflows/Grade.md` from grade section of SKILL.md | A1 | -| A4 | Create `Workflows/Evals.md` from evals section | A2 | -| A5 | Create `Workflows/Evolve.md` from evolve section | — | -| A6 | Create `Workflows/Rollback.md` from rollback section | — | -| A7 | Create `Workflows/Watch.md` from watch section | — | -| A8 | Create `Workflows/Doctor.md` from doctor section | — | -| A9 | Create `Workflows/Ingest.md` combining ingest codex + ingest opencode + ingest wrap-codex | — | -| A10 | Create `Workflows/Initialize.md` (references Track B output format) | B1 | -| A11 | Rewrite SKILL.md as slim routing table | A3-A10 | +| Step | Description | Depends On | +| ---- | ----------------------------------------------------------------------------------------- | ---------- | +| A1 | Extract grading methodology from SKILL.md → `references/grading-methodology.md` | — | +| A2 | Extract invocation taxonomy from SKILL.md → `references/invocation-taxonomy.md` | — | +| A3 | Create `Workflows/Grade.md` from grade section of SKILL.md | A1 | +| A4 | Create `Workflows/Evals.md` from evals section | A2 | +| A5 | Create `Workflows/Evolve.md` from evolve section | — | +| A6 | Create `Workflows/Rollback.md` from rollback section | — | +| A7 | Create `Workflows/Watch.md` from watch section | — | +| A8 | Create `Workflows/Doctor.md` from doctor section | — | +| A9 | Create `Workflows/Ingest.md` combining ingest codex + ingest opencode + ingest wrap-codex | — | +| A10 | Create `Workflows/Initialize.md` (references Track B output format) | B1 | +| A11 | Rewrite SKILL.md as slim routing table | A3-A10 | ### Track B: CLI `init` Command Build the bootstrap command. -| Step | Description | Depends On | -|------|-------------|------------| -| B1 | Define config schema in `cli/selftune/types.ts` | — | -| B2 | Create `cli/selftune/init.ts` with agent detection + config write | B1 | -| B3 | Wire `init` into `cli/selftune/index.ts` router | B2 | -| B4 | Write tests for init command | B2 | -| B5 | Update `doctor` to check for config file existence | B2 | +| Step | Description | Depends On | +| ---- | ----------------------------------------------------------------- | ---------- | +| B1 | Define config schema in `cli/selftune/types.ts` | — | +| B2 | Create `cli/selftune/init.ts` with agent detection + config write | B1 | +| B3 | Wire `init` into `cli/selftune/index.ts` router | B2 | +| B4 | Write tests for init command | B2 | +| B5 | Update `doctor` to check for config file existence | B2 | ### Integration -| Step | Description | Depends On | -|------|-------------|------------| -| C1 | Each workflow references config for CLI path resolution | A3-A10, B2 | -| C2 | Update README.md with new quick-start flow | A11, B3 | -| C3 | Run full test suite (`bun test`) to verify nothing broken | C1, C2 | +| Step | Description | Depends On | +| ---- | --------------------------------------------------------- | ---------- | +| C1 | Each workflow references config for CLI path resolution | A3-A10, B2 | +| C2 | Update README.md with new quick-start flow | A11, B3 | +| C3 | Run full test suite (`bun test`) to verify nothing broken | C1, C2 | --- ## Design Decisions -| Decision | Choice | Rationale | -|----------|--------|-----------| -| Config location | `~/.selftune/config.json` | Agent-agnostic, outside any single project | -| Agent detection | Auto-detect + confirm | Avoid false positives (user might have multiple agents) | -| Ingest workflows | Combined into one file | Same concept: "bring external sessions into shared schema" | -| Workflow file per command | Yes, 1:1 mapping | Matches Reins pattern, keeps each file focused | -| References extracted | grading-methodology + invocation-taxonomy | These are conceptual knowledge, not command workflows | -| CLI path in config | Absolute path at init time | No runtime discovery needed, survives directory changes | +| Decision | Choice | Rationale | +| ------------------------- | ----------------------------------------- | ---------------------------------------------------------- | +| Config location | `~/.selftune/config.json` | Agent-agnostic, outside any single project | +| Agent detection | Auto-detect + confirm | Avoid false positives (user might have multiple agents) | +| Ingest workflows | Combined into one file | Same concept: "bring external sessions into shared schema" | +| Workflow file per command | Yes, 1:1 mapping | Matches Reins pattern, keeps each file focused | +| References extracted | grading-methodology + invocation-taxonomy | These are conceptual knowledge, not command workflows | +| CLI path in config | Absolute path at init time | No runtime discovery needed, survives directory changes | --- diff --git a/docs/exec-plans/completed/dashboard-data-integrity-recovery.md b/docs/exec-plans/completed/dashboard-data-integrity-recovery.md index 6cb7d533..a605abda 100644 --- a/docs/exec-plans/completed/dashboard-data-integrity-recovery.md +++ b/docs/exec-plans/completed/dashboard-data-integrity-recovery.md @@ -11,6 +11,7 @@ This recovery plan has partially executed. **Landed already:** + - runtime identity now exposes repo-root `workspace_root`, git SHA, DB/log/config paths, watcher mode, and process mode - the dashboard UI now shows a runtime footer - the dashboard footer and Status page now warn explicitly when live invalidation is still in legacy JSONL watcher mode @@ -21,6 +22,7 @@ This recovery plan has partially executed. - doctor now includes an integrity warning about the current JSONL-backed dashboard freshness contract **Still open from this plan:** + - backup symmetry for `evolution_audit`, `evolution_evidence`, and `orchestrate_runs` - WAL-driven SSE freshness instead of JSONL watcher invalidation - clearer overview timeline semantics diff --git a/docs/exec-plans/completed/dashboard-signal-integration.md b/docs/exec-plans/completed/dashboard-signal-integration.md index ba8be95f..54590ee3 100644 --- a/docs/exec-plans/completed/dashboard-signal-integration.md +++ b/docs/exec-plans/completed/dashboard-signal-integration.md @@ -38,15 +38,16 @@ This plan adds signal data to the full dashboard pipeline: schema → materializ **Files:** -| File | Change | -|------|--------| -| `cli/selftune/localdb/schema.ts` | Add `improvement_signals` table | -| `cli/selftune/localdb/materialize.ts` | Read `SIGNAL_LOG`, insert into signals table | -| `cli/selftune/localdb/queries.ts` | Add signal count/history queries | -| `cli/selftune/dashboard-contract.ts` | Add signal fields to `OverviewPayload`, `SkillReportResponse`, `OrchestrateRunSkillAction` | -| `cli/selftune/dashboard-server.ts` | Query and expose signal data in existing endpoints | +| File | Change | +| ------------------------------------- | ------------------------------------------------------------------------------------------ | +| `cli/selftune/localdb/schema.ts` | Add `improvement_signals` table | +| `cli/selftune/localdb/materialize.ts` | Read `SIGNAL_LOG`, insert into signals table | +| `cli/selftune/localdb/queries.ts` | Add signal count/history queries | +| `cli/selftune/dashboard-contract.ts` | Add signal fields to `OverviewPayload`, `SkillReportResponse`, `OrchestrateRunSkillAction` | +| `cli/selftune/dashboard-server.ts` | Query and expose signal data in existing endpoints | **Schema:** + ```sql CREATE TABLE IF NOT EXISTS improvement_signals ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -62,6 +63,7 @@ CREATE TABLE IF NOT EXISTS improvement_signals ( ``` **Contract additions:** + ```typescript // OverviewPayload pending_signals?: number; @@ -79,6 +81,7 @@ signal_boost?: number; **File:** `cli/selftune/orchestrate.ts` Add to `OrchestrateRunSkillAction`: + - `signal_count` — number of signals that boosted this skill - `signal_boost` — total priority boost from signals @@ -88,12 +91,12 @@ These are already computed during candidate selection but not persisted. **Files:** -| File | Change | -|------|--------| -| `apps/local-dashboard/src/types.ts` | Import new signal fields | -| `apps/local-dashboard/src/pages/Overview.tsx` | Show pending signal count badge | -| `apps/local-dashboard/src/pages/SkillReport.tsx` | Show signal history timeline | -| `apps/local-dashboard/src/components/OrchestrateRunsPanel.tsx` | Show signal boost per skill | +| File | Change | +| -------------------------------------------------------------- | ------------------------------- | +| `apps/local-dashboard/src/types.ts` | Import new signal fields | +| `apps/local-dashboard/src/pages/Overview.tsx` | Show pending signal count badge | +| `apps/local-dashboard/src/pages/SkillReport.tsx` | Show signal history timeline | +| `apps/local-dashboard/src/components/OrchestrateRunsPanel.tsx` | Show signal boost per skill | --- diff --git a/docs/exec-plans/completed/output-quality-loop-prereqs.md b/docs/exec-plans/completed/output-quality-loop-prereqs.md index 8c113e4e..1f9e3c47 100644 --- a/docs/exec-plans/completed/output-quality-loop-prereqs.md +++ b/docs/exec-plans/completed/output-quality-loop-prereqs.md @@ -18,6 +18,7 @@ This is a prerequisite plan, not a feature plan. The output-quality PRD is strategically right but tactically early. Current priority remains: + - trusted alpha onboarding - reliable local-to-cloud upload - operator visibility @@ -46,17 +47,20 @@ For sessions where a skill fires, preserve the best available representation of what the agent actually produced. Prefer, in order: + - final assistant message text - generated file references and changed file paths - structured artifact metadata when available - attachment or screenshot references when available Minimum requirement: + - enough data to let an operator answer “the skill fired, but what did it produce?” ### 2. Output Context Linkage Every captured output signal should be linkable back to: + - `session_id` - `prompt_id` if available - `skill_invocation_id` @@ -71,6 +75,7 @@ This is what makes later grading and mutation evidence usable. For output-quality work, text alone is often insufficient. Capture references to: + - changed files - generated markdown/docs/code outputs - image or screenshot paths when local artifacts exist @@ -84,6 +89,7 @@ Store references and metadata first. Add a lightweight operator review path for “triggered correctly, output looked bad.” Minimum viable form: + - mark a session or invocation as output-bad - attach a short note - preserve the linked output evidence @@ -93,6 +99,7 @@ This gives real labels before full automation exists. ### 5. Cloud Queryability The cloud side should be able to answer: + - which skills trigger often but receive poor output feedback - which invocations are linked to output-bad labels - what artifacts or outputs were produced for those invocations @@ -104,6 +111,7 @@ This can start as operator-facing inspection, not polished UI. ### Slice A: Local Evidence Capture In `miami`, ensure the local telemetry pipeline preserves: + - final response text when safely available - changed file paths - artifact metadata or attachment references @@ -114,6 +122,7 @@ Prefer capture over elegance. ### Slice B: Canonical Upload Contract Extension Extend the alpha upload contract only where needed to preserve: + - output evidence references - linked file paths or artifact metadata - future operator labels for output quality @@ -124,6 +133,7 @@ Add only fields that are clearly useful for later grading or review. ### Slice C: Cloud Operator Inspection In the cloud app, ensure operator surfaces can inspect: + - invocation - output evidence - linked artifacts @@ -134,6 +144,7 @@ Start with raw/operator views, not polished product UI. ### Slice D: Manual Label Seed Add a minimal label model for: + - `output_bad` - `output_good` - optional note @@ -161,6 +172,7 @@ This is enough to seed the later quality loop. Use the output-quality PRD to influence **what data we keep now**. Do **not** treat it as the next implementation milestone until: + - alpha users are active - the current trigger/data loop is trusted - operator review of real outputs is happening diff --git a/docs/exec-plans/completed/telemetry-normalization.md b/docs/exec-plans/completed/telemetry-normalization.md index 61b9c567..4d3d3f4c 100644 --- a/docs/exec-plans/completed/telemetry-normalization.md +++ b/docs/exec-plans/completed/telemetry-normalization.md @@ -367,58 +367,58 @@ The cloud product should consume the same canonical model as local, not a second ### Track 0: Platform Contract Verification -| Step | Description | Depends On | -|------|-------------|------------| -| 0A | Inventory official docs for Claude Code, Codex, OpenCode, and OpenClaw fields relevant to sessions, prompts, invocations, hooks, stats, and exports | — | -| 0B | Capture fresh local sessions where available, and fall back to checked-in fixtures when a platform is not installed on this machine | 0A | -| 0C | Record drift cases where docs and observed payloads differ, and decide whether canonical fields are `required`, `optional`, or `derived` | 0B | -| 0D | Freeze a source-to-canonical field mapping table before adapter refactors begin | 0C | +| Step | Description | Depends On | +| ---- | --------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- | +| 0A | Inventory official docs for Claude Code, Codex, OpenCode, and OpenClaw fields relevant to sessions, prompts, invocations, hooks, stats, and exports | — | +| 0B | Capture fresh local sessions where available, and fall back to checked-in fixtures when a platform is not installed on this machine | 0A | +| 0C | Record drift cases where docs and observed payloads differ, and decide whether canonical fields are `required`, `optional`, or `derived` | 0B | +| 0D | Freeze a source-to-canonical field mapping table before adapter refactors begin | 0C | ### Track A: Schema and Domain Model -| Step | Description | Depends On | -|------|-------------|------------| -| A1 | Define canonical event entities in `types.ts` without breaking existing log readers | — | -| A2 | Add explicit enums for `platform`, `capture_mode`, `prompt_kind`, `invocation_mode` | A1 | -| A3 | Add `schema_version` + `normalizer_version` to canonical records | A1 | -| A4 | Write design doc for canonical event model and local/cloud projection rules | A1 | +| Step | Description | Depends On | +| ---- | ----------------------------------------------------------------------------------- | ---------- | +| A1 | Define canonical event entities in `types.ts` without breaking existing log readers | — | +| A2 | Add explicit enums for `platform`, `capture_mode`, `prompt_kind`, `invocation_mode` | A1 | +| A3 | Add `schema_version` + `normalizer_version` to canonical records | A1 | +| A4 | Write design doc for canonical event model and local/cloud projection rules | A1 | ### Track B: Local Normalizer -| Step | Description | Depends On | -|------|-------------|------------| -| B1 | Add a normalization module that converts raw adapter data into canonical events | A2 | -| B2 | Move actionable/meta classification into normalization, not downstream dashboards | B1 | -| B3 | Emit normalized local projections from canonical events | B1 | -| B4 | Fold repaired overlays into the same normalization pipeline | B1 | +| Step | Description | Depends On | +| ---- | --------------------------------------------------------------------------------- | ---------- | +| B1 | Add a normalization module that converts raw adapter data into canonical events | A2 | +| B2 | Move actionable/meta classification into normalization, not downstream dashboards | B1 | +| B3 | Emit normalized local projections from canonical events | B1 | +| B4 | Fold repaired overlays into the same normalization pipeline | B1 | ### Track C: Adapter Upgrades -| Step | Description | Depends On | -|------|-------------|------------| -| C1 | Upgrade Claude Code hook/replay writers to populate canonical source dimensions | B1 | -| C2 | Upgrade Codex wrapper/rollout ingest to canonical source dimensions | B1 | -| C3 | Upgrade OpenCode ingest to canonical source dimensions | B1 | -| C4 | Upgrade OpenClaw ingest to canonical source dimensions | B1 | -| C5 | Add adapter fixture coverage for normalization output parity | C1, C2, C3, C4 | +| Step | Description | Depends On | +| ---- | ------------------------------------------------------------------------------- | -------------- | +| C1 | Upgrade Claude Code hook/replay writers to populate canonical source dimensions | B1 | +| C2 | Upgrade Codex wrapper/rollout ingest to canonical source dimensions | B1 | +| C3 | Upgrade OpenCode ingest to canonical source dimensions | B1 | +| C4 | Upgrade OpenClaw ingest to canonical source dimensions | B1 | +| C5 | Add adapter fixture coverage for normalization output parity | C1, C2, C3, C4 | ### Track D: Cloud Projection -| Step | Description | Depends On | -|------|-------------|------------| -| D1 | Define Neon schema for sessions, prompts, invocations, telemetry facts | A4 | -| D2 | Add canonical-to-Neon projection layer | D1, B3 | -| D3 | Ensure local and cloud dashboards consume the same canonical view model | D2 | -| D4 | Add replay-safe idempotency keys for cloud upserts | D2 | +| Step | Description | Depends On | +| ---- | ----------------------------------------------------------------------- | ---------- | +| D1 | Define Neon schema for sessions, prompts, invocations, telemetry facts | A4 | +| D2 | Add canonical-to-Neon projection layer | D1, B3 | +| D3 | Ensure local and cloud dashboards consume the same canonical view model | D2 | +| D4 | Add replay-safe idempotency keys for cloud upserts | D2 | ### Track E: Analytics and Evidence -| Step | Description | Depends On | -|------|-------------|------------| -| E1 | Replace free-form `source` heuristics in analytics with canonical fields | B3 | -| E2 | Add quality dashboards for `meta_rate`, `repair_rate`, `explicit_invocation_rate` | B3 | -| E3 | Track invocation confidence and prompt/invocation joins in reports | E1 | -| E4 | Add cloud-facing evidence exports from canonical records | D2, E3 | +| Step | Description | Depends On | +| ---- | --------------------------------------------------------------------------------- | ---------- | +| E1 | Replace free-form `source` heuristics in analytics with canonical fields | B3 | +| E2 | Add quality dashboards for `meta_rate`, `repair_rate`, `explicit_invocation_rate` | B3 | +| E3 | Track invocation confidence and prompt/invocation joins in reports | E1 | +| E4 | Add cloud-facing evidence exports from canonical records | D2, E3 | --- diff --git a/docs/exec-plans/deferred/advanced-skill-patterns-adoption.md b/docs/exec-plans/deferred/advanced-skill-patterns-adoption.md index 240eb480..28e7fdee 100644 --- a/docs/exec-plans/deferred/advanced-skill-patterns-adoption.md +++ b/docs/exec-plans/deferred/advanced-skill-patterns-adoption.md @@ -105,8 +105,8 @@ If the design is sound, implement the split for a small set of high-value helper **Files:** -| File | Change | -|------|--------| +| File | Change | +| ---------------- | ---------------------------------- | | `skill/SKILL.md` | Add `argument-hint` to frontmatter | **Recommended value:** @@ -121,13 +121,13 @@ This improves direct `/selftune ...` invocation UX while preserving auto-routing **Files:** -| File | Change | -|------|--------| -| `skill/examples/doctor-output.md` | New example of doctor output interpretation | -| `skill/examples/evolve-summary.md` | New example of evolve dry-run summary | +| File | Change | +| --------------------------------------- | ------------------------------------------------ | +| `skill/examples/doctor-output.md` | New example of doctor output interpretation | +| `skill/examples/evolve-summary.md` | New example of evolve dry-run summary | | `skill/examples/orchestrate-summary.md` | New example of orchestrate result interpretation | -| `skill/SKILL.md` | Add examples to resource index | -| Relevant `Workflows/*.md` | Reference examples where useful | +| `skill/SKILL.md` | Add examples to resource index | +| Relevant `Workflows/*.md` | Reference examples where useful | **Rationale:** @@ -137,11 +137,11 @@ The Claude Code docs recommend supporting files for detailed examples instead of **Files:** -| File | Change | -|------|--------| -| `skill/SKILL.md` | Update any skill-local path guidance to prefer skill-dir-relative references | -| `skill/Workflows/Initialize.md` | Use `${CLAUDE_SKILL_DIR}` when referencing bundled setup files in command/snippet examples | -| `skill/references/setup-patterns.md` | Use `${CLAUDE_SKILL_DIR}` in examples that point to bundled assets | +| File | Change | +| ------------------------------------ | ------------------------------------------------------------------------------------------ | +| `skill/SKILL.md` | Update any skill-local path guidance to prefer skill-dir-relative references | +| `skill/Workflows/Initialize.md` | Use `${CLAUDE_SKILL_DIR}` when referencing bundled setup files in command/snippet examples | +| `skill/references/setup-patterns.md` | Use `${CLAUDE_SKILL_DIR}` in examples that point to bundled assets | **Rule:** @@ -200,12 +200,12 @@ No code changes are required to complete this phase. ### Likely rollout pattern -| Helper role | Recommended controls | -|-------------|----------------------| -| Diagnosis | `context: fork`, `agent`, `user-invocable: false` | -| Evolution review | `context: fork`, `agent`, `user-invocable: false` | -| Integration guide | `context: fork`, `agent`, maybe user-invocable if exposed intentionally | -| Destructive/manual workflows if split out | `disable-model-invocation: true` | +| Helper role | Recommended controls | +| ----------------------------------------- | ----------------------------------------------------------------------- | +| Diagnosis | `context: fork`, `agent`, `user-invocable: false` | +| Evolution review | `context: fork`, `agent`, `user-invocable: false` | +| Integration guide | `context: fork`, `agent`, maybe user-invocable if exposed intentionally | +| Destructive/manual workflows if split out | `disable-model-invocation: true` | ### Explicit anti-patterns diff --git a/docs/exec-plans/deferred/multi-agent-sandbox.md b/docs/exec-plans/deferred/multi-agent-sandbox.md index 6f03b966..770de260 100644 --- a/docs/exec-plans/deferred/multi-agent-sandbox.md +++ b/docs/exec-plans/deferred/multi-agent-sandbox.md @@ -69,76 +69,76 @@ sandbox-shell-codex: Split the generic design doc into shared architecture + Claude Code-specific docs. -| Step | Description | Depends On | -|------|-------------|------------| -| A1 | Create `sandbox-architecture.md` from shared sections of `sandbox-test-harness.md` | — | -| A2 | Rename `sandbox-test-harness.md` → `sandbox-claude-code.md`, trim shared content | A1 | -| A3 | Update `docs/design-docs/index.md` with new file names | A2 | +| Step | Description | Depends On | +| ---- | ---------------------------------------------------------------------------------- | ---------- | +| A1 | Create `sandbox-architecture.md` from shared sections of `sandbox-test-harness.md` | — | +| A2 | Rename `sandbox-test-harness.md` → `sandbox-claude-code.md`, trim shared content | A1 | +| A3 | Update `docs/design-docs/index.md` with new file names | A2 | ### Track B: Fixture Restructure Reorganize fixtures into shared + per-agent directories. -| Step | Description | Depends On | -|------|-------------|------------| -| B1 | Create `fixtures/shared/` with agent-agnostic JSONL logs and skill definitions | — | -| B2 | Create `fixtures/claude-code/` with Claude Code-specific config, transcripts, hook payloads, settings | B1 | -| B3 | Create `fixtures/codex/` with `selftune-config.json` + rollout JSONL files | B1 | -| B4 | Create `fixtures/opencode/` with `selftune-config.json` + SQLite db | B1 | -| B5 | Update `run-sandbox.ts` to read from new fixture paths | B2 | -| B6 | Update `provision-claude.sh` for new fixture paths | B2 | -| B7 | Add `--agent` flag to `run-sandbox.ts` for agent-specific test selection | B5 | +| Step | Description | Depends On | +| ---- | ----------------------------------------------------------------------------------------------------- | ---------- | +| B1 | Create `fixtures/shared/` with agent-agnostic JSONL logs and skill definitions | — | +| B2 | Create `fixtures/claude-code/` with Claude Code-specific config, transcripts, hook payloads, settings | B1 | +| B3 | Create `fixtures/codex/` with `selftune-config.json` + rollout JSONL files | B1 | +| B4 | Create `fixtures/opencode/` with `selftune-config.json` + SQLite db | B1 | +| B5 | Update `run-sandbox.ts` to read from new fixture paths | B2 | +| B6 | Update `provision-claude.sh` for new fixture paths | B2 | +| B7 | Add `--agent` flag to `run-sandbox.ts` for agent-specific test selection | B5 | ### Track C: Layer 1 Agent Coverage Add per-agent tests to the local sandbox. -| Step | Description | Depends On | -|------|-------------|------------| -| C1 | Add Codex ingestor test (`ingest codex --dry-run`) | B3 | -| C2 | Add OpenCode ingestor test (`ingest opencode --dry-run`) | B4 | -| C3 | Make hook tests conditional on `agent_type === "claude_code"` | B5 | -| C4 | Add Makefile targets: `sandbox-codex`, `sandbox-opencode`, `sandbox-all` | C1, C2 | +| Step | Description | Depends On | +| ---- | ------------------------------------------------------------------------ | ---------- | +| C1 | Add Codex ingestor test (`ingest codex --dry-run`) | B3 | +| C2 | Add OpenCode ingestor test (`ingest opencode --dry-run`) | B4 | +| C3 | Make hook tests conditional on `agent_type === "claude_code"` | B5 | +| C4 | Add Makefile targets: `sandbox-codex`, `sandbox-opencode`, `sandbox-all` | C1, C2 | ### Track D: Layer 2 Docker Expansion Create per-agent Docker containers for LLM testing. -| Step | Description | Depends On | -|------|-------------|------------| -| D1 | Move `tests/sandbox/docker/` → `tests/sandbox/claude-code/` | — | -| D2 | Update Makefile targets to reference new `claude-code/` path | D1 | -| D3 | Create `tests/sandbox/codex/Dockerfile` based on Claude Code pattern | D1 | -| D4 | Create `tests/sandbox/codex/docker-compose.yml` | D3 | -| D5 | Create `tests/sandbox/codex/provision.sh` for Codex fixture setup | D3, B3 | -| D6 | Create `tests/sandbox/codex/run-with-llm.ts` for Codex LLM tests | D4 | -| D7 | Create `tests/sandbox/opencode/Dockerfile` based on Claude Code pattern | D1 | -| D8 | Create `tests/sandbox/opencode/docker-compose.yml` | D7 | -| D9 | Create `tests/sandbox/opencode/provision.sh` for OpenCode fixture setup | D7, B4 | -| D10 | Create `tests/sandbox/opencode/run-with-llm.ts` for OpenCode LLM tests | D8 | -| D11 | Add Makefile targets: `sandbox-llm-codex`, `sandbox-llm-opencode`, `sandbox-shell-codex` | D6, D10 | +| Step | Description | Depends On | +| ---- | ---------------------------------------------------------------------------------------- | ---------- | +| D1 | Move `tests/sandbox/docker/` → `tests/sandbox/claude-code/` | — | +| D2 | Update Makefile targets to reference new `claude-code/` path | D1 | +| D3 | Create `tests/sandbox/codex/Dockerfile` based on Claude Code pattern | D1 | +| D4 | Create `tests/sandbox/codex/docker-compose.yml` | D3 | +| D5 | Create `tests/sandbox/codex/provision.sh` for Codex fixture setup | D3, B3 | +| D6 | Create `tests/sandbox/codex/run-with-llm.ts` for Codex LLM tests | D4 | +| D7 | Create `tests/sandbox/opencode/Dockerfile` based on Claude Code pattern | D1 | +| D8 | Create `tests/sandbox/opencode/docker-compose.yml` | D7 | +| D9 | Create `tests/sandbox/opencode/provision.sh` for OpenCode fixture setup | D7, B4 | +| D10 | Create `tests/sandbox/opencode/run-with-llm.ts` for OpenCode LLM tests | D8 | +| D11 | Add Makefile targets: `sandbox-llm-codex`, `sandbox-llm-opencode`, `sandbox-shell-codex` | D6, D10 | ### Track E: Per-Agent Design Docs Document each agent's sandbox after implementation. -| Step | Description | Depends On | -|------|-------------|------------| -| E1 | Write `docs/design-docs/sandbox-codex.md` after Codex sandbox is built | D6, C1 | -| E2 | Write `docs/design-docs/sandbox-opencode.md` after OpenCode sandbox is built | D10, C2 | -| E3 | Update `ARCHITECTURE.md` sandbox section with multi-agent structure | E1, E2 | +| Step | Description | Depends On | +| ---- | ---------------------------------------------------------------------------- | ---------- | +| E1 | Write `docs/design-docs/sandbox-codex.md` after Codex sandbox is built | D6, C1 | +| E2 | Write `docs/design-docs/sandbox-opencode.md` after OpenCode sandbox is built | D10, C2 | +| E3 | Update `ARCHITECTURE.md` sandbox section with multi-agent structure | E1, E2 | --- ## Design Decisions -| Decision | Choice | Rationale | -|----------|--------|-----------| -| Fixture organization | `shared/` + per-agent overlays | Agent-agnostic data (JSONL logs, skills) shared; agent-specific data isolated | -| Docker per agent | Separate Dockerfiles per agent | Each agent has different runtime requirements and CLI tools | -| Backward compatibility | `sandbox` and `sandbox-llm` default to Claude Code | Existing workflows unbroken | -| `--agent` flag | Added to `run-sandbox.ts` | Single entry point with agent selection vs. separate scripts | -| Track order | A → B → C → D → E | Docs first (zero risk), then fixtures, then tests, then Docker, then final docs | +| Decision | Choice | Rationale | +| ---------------------- | -------------------------------------------------- | ------------------------------------------------------------------------------- | +| Fixture organization | `shared/` + per-agent overlays | Agent-agnostic data (JSONL logs, skills) shared; agent-specific data isolated | +| Docker per agent | Separate Dockerfiles per agent | Each agent has different runtime requirements and CLI tools | +| Backward compatibility | `sandbox` and `sandbox-llm` default to Claude Code | Existing workflows unbroken | +| `--agent` flag | Added to `run-sandbox.ts` | Single entry point with agent selection vs. separate scripts | +| Track order | A → B → C → D → E | Docs first (zero risk), then fixtures, then tests, then Docker, then final docs | --- diff --git a/docs/exec-plans/deferred/phase-d-marginal-case-review-spike.md b/docs/exec-plans/deferred/phase-d-marginal-case-review-spike.md index ee31a7fe..de8c4138 100644 --- a/docs/exec-plans/deferred/phase-d-marginal-case-review-spike.md +++ b/docs/exec-plans/deferred/phase-d-marginal-case-review-spike.md @@ -70,12 +70,12 @@ The system only needs to answer: Every reviewed case should eventually be classifiable as one of: -| Expected | Actual | Outcome | -|---|---|---| -| should trigger | triggered | true positive | -| should trigger | not triggered | false negative | -| should not trigger | triggered | false positive | -| should not trigger | not triggered | true negative | +| Expected | Actual | Outcome | +| ------------------ | ------------- | -------------- | +| should trigger | triggered | true positive | +| should trigger | not triggered | false negative | +| should not trigger | triggered | false positive | +| should not trigger | not triggered | true negative | In practice: @@ -159,18 +159,18 @@ Recommended shape: ```ts interface MarginalCaseReview { - review_id: string - user_id: string - session_id: string - occurred_at: string - skill_name: string | null - query_text: string - candidate_type: "likely_false_negative" | "likely_false_positive" | "marginal" - predicted_quadrant: "tp" | "fp" | "fn" | "tn" | "unknown" - reviewer_label: "tp" | "fp" | "fn" | "tn" | "unsure" - reviewer_note?: string - reviewer_id: string - reviewed_at: string + review_id: string; + user_id: string; + session_id: string; + occurred_at: string; + skill_name: string | null; + query_text: string; + candidate_type: "likely_false_negative" | "likely_false_positive" | "marginal"; + predicted_quadrant: "tp" | "fp" | "fn" | "tn" | "unknown"; + reviewer_label: "tp" | "fp" | "fn" | "tn" | "unsure"; + reviewer_note?: string; + reviewer_id: string; + reviewed_at: string; } ``` diff --git a/docs/exec-plans/reference/subagent-testing-checklist.md b/docs/exec-plans/reference/subagent-testing-checklist.md index d526c665..973dcf6c 100644 --- a/docs/exec-plans/reference/subagent-testing-checklist.md +++ b/docs/exec-plans/reference/subagent-testing-checklist.md @@ -22,6 +22,7 @@ Test through the parent selftune skill, not just by reading the markdown. - Pattern prompt: `which of my skills overlap` Pass criteria: + - the parent chooses the correct bundled agent - the parent provides the required inputs - the subagent returns a structured worker report diff --git a/docs/exec-plans/reference/telemetry-field-map.md b/docs/exec-plans/reference/telemetry-field-map.md index 154a2867..f1cd9e25 100644 --- a/docs/exec-plans/reference/telemetry-field-map.md +++ b/docs/exec-plans/reference/telemetry-field-map.md @@ -30,87 +30,87 @@ The extracted local contract source lives at `packages/telemetry-contract/`, wit ## Confidence and Invocation Rules -| Canonical field | Rule | -|---|---| -| `invocation_mode = explicit` | Direct skill tool invocation or source-native equivalent | -| `invocation_mode = implicit` | SKILL.md read or equivalent high-signal guidance access without explicit invocation | +| Canonical field | Rule | +| ---------------------------- | ---------------------------------------------------------------------------------------- | +| `invocation_mode = explicit` | Direct skill tool invocation or source-native equivalent | +| `invocation_mode = implicit` | SKILL.md read or equivalent high-signal guidance access without explicit invocation | | `invocation_mode = inferred` | Text/tool sequence strongly implies skill usage but no direct invocation artifact exists | -| `invocation_mode = repaired` | Reconstructed from historical transcripts after ingestion | -| `confidence = 1.0` | Explicit invocation | -| `confidence = 0.7` | Implicit invocation | -| `confidence = 0.4` | Inferred invocation | -| `confidence = 0.9` | Repaired invocation with transcript evidence | +| `invocation_mode = repaired` | Reconstructed from historical transcripts after ingestion | +| `confidence = 1.0` | Explicit invocation | +| `confidence = 0.7` | Implicit invocation | +| `confidence = 0.4` | Inferred invocation | +| `confidence = 0.9` | Repaired invocation with transcript evidence | --- ## Session and Source Fields -| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | -|---|---|---|---|---|---|---| -| `platform` | required | constant `claude_code` | constant `codex` | constant `opencode` | constant `openclaw` | Source identity must be first-class, not a free-form `source` string | -| `capture_mode` | required | `hook` for live hooks, `replay` for transcript backfill | `wrapper` for live `codex exec`, `batch_ingest` for rollout files | `batch_ingest` for storage/db/export ingestion | `batch_ingest` for session-file ingestion | Repairs use `repair` in overlay paths | -| `source_session_kind` | required | `interactive`, `replayed`, or `repaired` | `interactive` for wrapper, `replayed` for rollout ingest | `interactive` if exported live, `replayed` for local historical ingest | `interactive` for live sessions, `replayed` for file ingest | `synthetic` is test-only | -| `raw_source_ref` | required | hook event name and/or transcript path + line | rollout path + line or wrapper stream event | DB row/table or JSON file path | session file path + line | Needed for auditability and repair | -| `session_id` | required | hook `session_id` or transcript `sessionId` | docs `SESSION_ID`; observed `session_meta.payload.id` | DB/session JSON `id` | session header `id`; docs also distinguish `sessionKey` | Canonical `session_id` must be stable across projections | -| `external_session_id` | optional | same as `session_id` today | source-native session/thread ID if we mint a different canonical ID | source-native session ID if normalized ID changes | source-native `sessionId` if distinct from `session_key` | Avoid unless we truly need an internal surrogate ID | -| `parent_session_id` | optional | transcript `parentUuid`, subagent lineage | not yet observed | not yet observed | not yet observed | Important for subagents and branch sessions | -| `agent_id` | optional | docs `SubagentStop.agent_id` | observed `session_meta.payload.originator` if it proves to be agent identity | use source-native agent ID if present in exports/server API | docs may expose agent/account context | Do not guess when uncertain | -| `agent_type` | optional | docs `SubagentStop.agent_type` | not yet observed | not yet observed | source-native if present | Important for multi-agent attribution | -| `session_key` | optional | none | none | none | docs `sessionKey` | Keep separate from `session_id` | -| `channel` | optional | none | none | none | docs channel routing metadata | Needed for transport-aware analytics | -| `workspace_path` | optional | hook/transcript `cwd` | observed `session_meta.payload.cwd` or `turn_context.payload.cwd` | observed legacy session `directory` or export metadata | session header `cwd` | Normalize to absolute path when possible | -| `repo_root` | optional | derive from `cwd` when stable | derive from `cwd` | derive from `directory` | derive from `cwd` | Derived field | -| `repo_remote` | optional | derive from repo state if available | observed nested `git` payload when available | derive from repo state if directory exists | derive from repo state if available | Derived field | -| `branch` | optional | transcript `gitBranch` | observed nested `git` payload when available | derive from repo state or export metadata | derive from repo state if available | Do not make branch mandatory | -| `commit_sha` | optional | derive from repo state if available | observed nested `git` payload when available | derive from repo state if available | derive from repo state if available | Derived field | -| `permission_mode` | optional | observed transcript `permissionMode` | none | none | none | Claude-specific but useful | -| `approval_policy` | optional | none | observed `turn_context.payload.approval_policy` | use export/server metadata if present | none | Codex-specific today | -| `sandbox_policy` | optional | none | observed `turn_context.payload.sandbox_policy` | use export/server metadata if present | none | Codex-specific today | -| `provider` | optional | typically `anthropic` | observed `session_meta.payload.model_provider` | from stats/export/session metadata | from runtime metadata if present | Keep provider separate from model | -| `model` | optional | docs `SessionStart.model` | observed `turn_context.payload.model` | from stats/export/session metadata | from runtime metadata if present | Session-level default model | -| `started_at` | optional | hook start timestamp or first transcript event | `session_meta.timestamp` | session/export `time` or first message time | session header `timestamp` | Prefer source-native start time | -| `ended_at` | optional | `SessionEnd` time or last transcript event | last terminal event | last message/export end time | last session file line time | Derived when not explicitly emitted | -| `completion_status` | optional | derived from `SessionEnd.reason` | derived from terminal event outcome | derived from export/session status | derived from session outcome | `completed`, `failed`, `interrupted`, `cancelled`, `unknown` | -| `end_reason` | optional | docs `SessionEnd.reason` | terminal event/error payload | export/session status reason if present | hook/session reason if available | Preserve raw reason text | +| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | +| --------------------- | ----------- | ------------------------------------------------------- | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- | ----------------------------------------------------------- | -------------------------------------------------------------------- | +| `platform` | required | constant `claude_code` | constant `codex` | constant `opencode` | constant `openclaw` | Source identity must be first-class, not a free-form `source` string | +| `capture_mode` | required | `hook` for live hooks, `replay` for transcript backfill | `wrapper` for live `codex exec`, `batch_ingest` for rollout files | `batch_ingest` for storage/db/export ingestion | `batch_ingest` for session-file ingestion | Repairs use `repair` in overlay paths | +| `source_session_kind` | required | `interactive`, `replayed`, or `repaired` | `interactive` for wrapper, `replayed` for rollout ingest | `interactive` if exported live, `replayed` for local historical ingest | `interactive` for live sessions, `replayed` for file ingest | `synthetic` is test-only | +| `raw_source_ref` | required | hook event name and/or transcript path + line | rollout path + line or wrapper stream event | DB row/table or JSON file path | session file path + line | Needed for auditability and repair | +| `session_id` | required | hook `session_id` or transcript `sessionId` | docs `SESSION_ID`; observed `session_meta.payload.id` | DB/session JSON `id` | session header `id`; docs also distinguish `sessionKey` | Canonical `session_id` must be stable across projections | +| `external_session_id` | optional | same as `session_id` today | source-native session/thread ID if we mint a different canonical ID | source-native session ID if normalized ID changes | source-native `sessionId` if distinct from `session_key` | Avoid unless we truly need an internal surrogate ID | +| `parent_session_id` | optional | transcript `parentUuid`, subagent lineage | not yet observed | not yet observed | not yet observed | Important for subagents and branch sessions | +| `agent_id` | optional | docs `SubagentStop.agent_id` | observed `session_meta.payload.originator` if it proves to be agent identity | use source-native agent ID if present in exports/server API | docs may expose agent/account context | Do not guess when uncertain | +| `agent_type` | optional | docs `SubagentStop.agent_type` | not yet observed | not yet observed | source-native if present | Important for multi-agent attribution | +| `session_key` | optional | none | none | none | docs `sessionKey` | Keep separate from `session_id` | +| `channel` | optional | none | none | none | docs channel routing metadata | Needed for transport-aware analytics | +| `workspace_path` | optional | hook/transcript `cwd` | observed `session_meta.payload.cwd` or `turn_context.payload.cwd` | observed legacy session `directory` or export metadata | session header `cwd` | Normalize to absolute path when possible | +| `repo_root` | optional | derive from `cwd` when stable | derive from `cwd` | derive from `directory` | derive from `cwd` | Derived field | +| `repo_remote` | optional | derive from repo state if available | observed nested `git` payload when available | derive from repo state if directory exists | derive from repo state if available | Derived field | +| `branch` | optional | transcript `gitBranch` | observed nested `git` payload when available | derive from repo state or export metadata | derive from repo state if available | Do not make branch mandatory | +| `commit_sha` | optional | derive from repo state if available | observed nested `git` payload when available | derive from repo state if available | derive from repo state if available | Derived field | +| `permission_mode` | optional | observed transcript `permissionMode` | none | none | none | Claude-specific but useful | +| `approval_policy` | optional | none | observed `turn_context.payload.approval_policy` | use export/server metadata if present | none | Codex-specific today | +| `sandbox_policy` | optional | none | observed `turn_context.payload.sandbox_policy` | use export/server metadata if present | none | Codex-specific today | +| `provider` | optional | typically `anthropic` | observed `session_meta.payload.model_provider` | from stats/export/session metadata | from runtime metadata if present | Keep provider separate from model | +| `model` | optional | docs `SessionStart.model` | observed `turn_context.payload.model` | from stats/export/session metadata | from runtime metadata if present | Session-level default model | +| `started_at` | optional | hook start timestamp or first transcript event | `session_meta.timestamp` | session/export `time` or first message time | session header `timestamp` | Prefer source-native start time | +| `ended_at` | optional | `SessionEnd` time or last transcript event | last terminal event | last message/export end time | last session file line time | Derived when not explicitly emitted | +| `completion_status` | optional | derived from `SessionEnd.reason` | derived from terminal event outcome | derived from export/session status | derived from session outcome | `completed`, `failed`, `interrupted`, `cancelled`, `unknown` | +| `end_reason` | optional | docs `SessionEnd.reason` | terminal event/error payload | export/session status reason if present | hook/session reason if available | Preserve raw reason text | --- ## Prompt Fields -| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | -|---|---|---|---|---|---|---| -| `prompt_id` | required | derive from `session_id` + prompt order or source-native message UUID | derive from source event order unless native ID exists | DB/export message ID when present, otherwise derive | derive from session file line order | Must be deterministic | -| `prompt_text` | required | hook `user_prompt` or transcript user text blocks | observed `event_msg.payload.message` when `payload.type = user_message` | user message text parts from DB/export/session JSON | `role = user` text blocks | Raw prompt text before sanitization | -| `prompt_hash` | optional | derive from `prompt_text` | derive | derive | derive | Useful for dedupe and privacy-safe analytics | -| `prompt_kind` | required | normalization classifier over hook/transcript text | normalization classifier over `event_msg` payloads | normalization classifier over message parts | normalization classifier over user blocks | Do not hardcode per-platform logic downstream | -| `is_actionable` | required | derived from `prompt_kind` + actionable-query filter | derived | derived | derived | Must be written at normalization time | -| `prompt_index` | optional | transcript order | event order within session | message order | file line order | Useful for replay/debugging | -| `parent_prompt_id` | optional | continuation chains when detectable | turn/thread ancestry if exposed | conversation ancestry if exposed | follow-up linkage if exposed | Usually absent; keep optional | -| `source_message_id` | optional | transcript `uuid` where present | native message/item ID if exposed | DB/export message ID | native message ID if introduced later | Preserve source-native IDs separately | +| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | +| ------------------- | ----------- | --------------------------------------------------------------------- | ----------------------------------------------------------------------- | --------------------------------------------------- | ----------------------------------------- | --------------------------------------------- | +| `prompt_id` | required | derive from `session_id` + prompt order or source-native message UUID | derive from source event order unless native ID exists | DB/export message ID when present, otherwise derive | derive from session file line order | Must be deterministic | +| `prompt_text` | required | hook `user_prompt` or transcript user text blocks | observed `event_msg.payload.message` when `payload.type = user_message` | user message text parts from DB/export/session JSON | `role = user` text blocks | Raw prompt text before sanitization | +| `prompt_hash` | optional | derive from `prompt_text` | derive | derive | derive | Useful for dedupe and privacy-safe analytics | +| `prompt_kind` | required | normalization classifier over hook/transcript text | normalization classifier over `event_msg` payloads | normalization classifier over message parts | normalization classifier over user blocks | Do not hardcode per-platform logic downstream | +| `is_actionable` | required | derived from `prompt_kind` + actionable-query filter | derived | derived | derived | Must be written at normalization time | +| `prompt_index` | optional | transcript order | event order within session | message order | file line order | Useful for replay/debugging | +| `parent_prompt_id` | optional | continuation chains when detectable | turn/thread ancestry if exposed | conversation ancestry if exposed | follow-up linkage if exposed | Usually absent; keep optional | +| `source_message_id` | optional | transcript `uuid` where present | native message/item ID if exposed | DB/export message ID | native message ID if introduced later | Preserve source-native IDs separately | --- ## Skill Invocation and Execution Fields -| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | -|---|---|---|---|---|---|---| -| `skill_invocation_id` | required | derive from session + tool call identity + skill | derive from source event/item identity + skill | derive from message/tool call identity + skill | derive from `toolCall.id` + skill | Deterministic synthetic ID is acceptable | -| `matched_prompt_id` | required | latest actionable prompt in session when invocation occurs | user_message event matched to invocation window | user message matched to tool/message window | latest actionable user message before invocation | Required so skill analytics stop using all-query denominators | -| `skill_name` | required when skill known | `Skill` tool arg or `SKILL.md` parent dir | source-native explicit skill signal when available; otherwise only after validated mapping | `SKILL.md` parent dir or explicit tool arg | `SKILL.md` parent dir or explicit tool arg | Avoid text-mention-only names unless `invocation_mode = inferred` | -| `skill_path` | optional | `Read.file_path` for `SKILL.md` | source path when available | source path when available | `Read.file_path` for `SKILL.md` | May be synthetic for wrappers | -| `skill_version_hash` | optional | derive from file contents when available | derive if path/file is available | derive if source export includes file access | derive if file path is available | Not required for first refactor | -| `invocation_mode` | required | `explicit` for `Skill` tool use, `implicit` for `SKILL.md` read, `repaired` for repair overlay | source-specific mapping after rollout parser rewrite | source-specific mapping after export/server integration | `implicit` from `SKILL.md` read, `inferred` only with high-signal evidence | See confidence rules above | -| `triggered` | required | true only for actual/validated invocation, false for browsing or negative checks | true only for validated skill use | true only for validated skill use | true only for validated skill use | Do not overload with “mentioned” | -| `confidence` | required | derived from invocation mode and evidence strength | derived | derived | derived | Numeric, 0.0-1.0 | -| `tool_name` | optional | hook/transcript tool name | response/function call item name | tool_use/function name | `toolCall.name` / `toolUse.name` | Useful for evidence pages | -| `tool_call_id` | optional | source-native tool call ID when available | function call ID or item ID | tool call ID if present | `toolCall.id` | Important for stable invocation IDs | -| `tool_calls_json` | optional | session-level parse output | session-level parse output | session-level parse output | session-level parse output | Use for execution fact records, not prompt records | -| `total_tool_calls` | optional | transcript metrics | rollout/wrapper metrics | DB/export metrics | session-file metrics | Execution fact record | -| `assistant_turns` | optional | transcript metrics | turn count | message/turn count | assistant message count | Execution fact record | -| `errors_encountered` | optional | transcript/hook metrics | terminal/error events | tool result / export errors | `toolResult.isError` | Execution fact record | -| `input_tokens` | optional | hook/session metrics when available | usage/token_count payloads | stats/export/session metrics | runtime metadata if present | Execution fact record | -| `output_tokens` | optional | hook/session metrics when available | usage/token_count payloads | stats/export/session metrics | runtime metadata if present | Execution fact record | -| `duration_ms` | optional | derived from start/end timestamps | derived from session timeline | derived from message/export timestamps | derived from session timeline | Derived field | +| Canonical field | Requirement | Claude Code | Codex | OpenCode | OpenClaw | Notes | +| --------------------- | ------------------------- | ---------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ | ------------------------------------------------------- | -------------------------------------------------------------------------- | ----------------------------------------------------------------- | +| `skill_invocation_id` | required | derive from session + tool call identity + skill | derive from source event/item identity + skill | derive from message/tool call identity + skill | derive from `toolCall.id` + skill | Deterministic synthetic ID is acceptable | +| `matched_prompt_id` | required | latest actionable prompt in session when invocation occurs | user_message event matched to invocation window | user message matched to tool/message window | latest actionable user message before invocation | Required so skill analytics stop using all-query denominators | +| `skill_name` | required when skill known | `Skill` tool arg or `SKILL.md` parent dir | source-native explicit skill signal when available; otherwise only after validated mapping | `SKILL.md` parent dir or explicit tool arg | `SKILL.md` parent dir or explicit tool arg | Avoid text-mention-only names unless `invocation_mode = inferred` | +| `skill_path` | optional | `Read.file_path` for `SKILL.md` | source path when available | source path when available | `Read.file_path` for `SKILL.md` | May be synthetic for wrappers | +| `skill_version_hash` | optional | derive from file contents when available | derive if path/file is available | derive if source export includes file access | derive if file path is available | Not required for first refactor | +| `invocation_mode` | required | `explicit` for `Skill` tool use, `implicit` for `SKILL.md` read, `repaired` for repair overlay | source-specific mapping after rollout parser rewrite | source-specific mapping after export/server integration | `implicit` from `SKILL.md` read, `inferred` only with high-signal evidence | See confidence rules above | +| `triggered` | required | true only for actual/validated invocation, false for browsing or negative checks | true only for validated skill use | true only for validated skill use | true only for validated skill use | Do not overload with “mentioned” | +| `confidence` | required | derived from invocation mode and evidence strength | derived | derived | derived | Numeric, 0.0-1.0 | +| `tool_name` | optional | hook/transcript tool name | response/function call item name | tool_use/function name | `toolCall.name` / `toolUse.name` | Useful for evidence pages | +| `tool_call_id` | optional | source-native tool call ID when available | function call ID or item ID | tool call ID if present | `toolCall.id` | Important for stable invocation IDs | +| `tool_calls_json` | optional | session-level parse output | session-level parse output | session-level parse output | session-level parse output | Use for execution fact records, not prompt records | +| `total_tool_calls` | optional | transcript metrics | rollout/wrapper metrics | DB/export metrics | session-file metrics | Execution fact record | +| `assistant_turns` | optional | transcript metrics | turn count | message/turn count | assistant message count | Execution fact record | +| `errors_encountered` | optional | transcript/hook metrics | terminal/error events | tool result / export errors | `toolResult.isError` | Execution fact record | +| `input_tokens` | optional | hook/session metrics when available | usage/token_count payloads | stats/export/session metrics | runtime metadata if present | Execution fact record | +| `output_tokens` | optional | hook/session metrics when available | usage/token_count payloads | stats/export/session metrics | runtime metadata if present | Execution fact record | +| `duration_ms` | optional | derived from start/end timestamps | derived from session timeline | derived from message/export timestamps | derived from session timeline | Derived field | --- diff --git a/docs/exec-plans/scope-expansion-plan.md b/docs/exec-plans/scope-expansion-plan.md index 697c2e31..6c83401b 100644 --- a/docs/exec-plans/scope-expansion-plan.md +++ b/docs/exec-plans/scope-expansion-plan.md @@ -12,15 +12,15 @@ selftune currently evolves only the description block in SKILL.md — the text b ## Team Structure (7 agents in parallel) -| Agent | Name | Type | Workstream | Recs | -|-------|------|------|-----------|------| -| 1 | `foundation` | Engineer | Foundation types + shared utils | Prereqs for all | -| 2 | `body-evolve` | Engineer | Skill body evolution pipeline | 1, 6, 8 | -| 3 | `grade baseline` | Engineer | Baseline comparison system | 2, 9 | -| 4 | `token-pareto` | Engineer | Token efficiency + Pareto expansion | 3 | -| 5 | `eval unit-test` | Engineer | Skill-level unit test framework | 5 | -| 6 | `eval composability` | Engineer | Multi-skill composability analysis | 7 | -| 7 | `skillsbench` | Engineer | SkillsBench task corpus importer | 10 | +| Agent | Name | Type | Workstream | Recs | +| ----- | -------------------- | -------- | ----------------------------------- | --------------- | +| 1 | `foundation` | Engineer | Foundation types + shared utils | Prereqs for all | +| 2 | `body-evolve` | Engineer | Skill body evolution pipeline | 1, 6, 8 | +| 3 | `grade baseline` | Engineer | Baseline comparison system | 2, 9 | +| 4 | `token-pareto` | Engineer | Token efficiency + Pareto expansion | 3 | +| 5 | `eval unit-test` | Engineer | Skill-level unit test framework | 5 | +| 6 | `eval composability` | Engineer | Multi-skill composability analysis | 7 | +| 7 | `skillsbench` | Engineer | SkillsBench task corpus importer | 10 | Agent 1 (`foundation`) runs first and unblocks agents 2-7. Agents 2-7 run in parallel after foundation completes. @@ -33,6 +33,7 @@ Agent 1 (`foundation`) runs first and unblocks agents 2-7. Agents 2-7 run in par The `buildTriggerCheckPrompt()` and `parseTriggerResponse()` functions in `cli/selftune/evolution/validate-proposal.ts` must be accessible to `eval/` modules. The architecture linter forbids `eval/` → `evolution/` imports. **File:** `cli/selftune/utils/trigger-check.ts` (new) + - Move `buildTriggerCheckPrompt()` from `validate-proposal.ts:33-43` - Move `parseTriggerResponse()` from `validate-proposal.ts:50+` - Update `validate-proposal.ts` to import from `../utils/trigger-check.js` @@ -40,6 +41,7 @@ The `buildTriggerCheckPrompt()` and `parseTriggerResponse()` functions in `cli/s ### 1b. Add `parseSkillSections()` and `replaceSection()` to deploy-proposal.ts **File:** `cli/selftune/evolution/deploy-proposal.ts` (modify) + - Add `parseSkillSections(content: string): SkillSections` — splits SKILL.md into named parts (frontmatter, title, description, workflow routing, remaining body) - Add `replaceSection(content, sectionName, newContent): string` — replaces a `## Section` block - Add `replaceBody(currentContent, proposedBody): string` — replaces entire body below frontmatter @@ -48,6 +50,7 @@ The `buildTriggerCheckPrompt()` and `parseTriggerResponse()` functions in `cli/s ### 1c. Add `modelFlag` to `callLlm()` **File:** `cli/selftune/utils/llm-call.ts` (modify) + - Add optional `modelFlag?: string` parameter to `callLlm()` and `callViaAgent()` - When set and agent is `claude`, append `--model ${modelFlag}` to subprocess args - Fully backward compatible — all existing callers pass no modelFlag @@ -112,7 +115,11 @@ export interface BaselineResult { } // Skill unit tests -export type AssertionType = "output_contains" | "output_matches_regex" | "tool_called" | "trigger_check"; +export type AssertionType = + | "output_contains" + | "output_matches_regex" + | "tool_called" + | "trigger_check"; export interface SkillAssertion { type: AssertionType; @@ -135,7 +142,12 @@ export interface UnitTestResult { test_id: string; overall_passed: boolean; trigger_passed: boolean; - assertion_results: Array<{ type: AssertionType; value: string; passed: boolean; evidence: string }>; + assertion_results: Array<{ + type: AssertionType; + value: string; + passed: boolean; + evidence: string; + }>; duration_ms: number; } @@ -179,12 +191,14 @@ export interface SkillsBenchTask { ### 1e. Populate token data in transcript parser **File:** `cli/selftune/utils/transcript.ts` (modify) + - Add `extractTokenUsage(transcriptPath): { input: number; output: number }` that sums `usage.input_tokens` and `usage.output_tokens` from Claude transcript JSONL entries - Call from `parseTranscript()` to populate the existing optional `input_tokens`/`output_tokens` fields in `SessionTelemetryRecord` ### 1f. Update architecture linter **File:** `lint-architecture.ts` (modify) + - Add `EVAL_FILES` set with new eval modules: `baseline.ts`, `composability.ts`, `unit-test.ts`, `import.ts` - Add `EVAL_FORBIDDEN` list: same as `CONTRIBUTE_FORBIDDEN` (no hooks/ingestors/grading/evolution/monitoring imports) - Add new evolution files to `EVOLUTION_FILES`: `propose-routing.ts`, `propose-body.ts`, `validate-body.ts`, `validate-routing.ts`, `refine-body.ts`, `evolve-body.ts` @@ -204,6 +218,7 @@ export interface SkillsBenchTask { ### 2a. Routing table proposal generation **File:** `cli/selftune/evolution/propose-routing.ts` (new) + - `ROUTING_PROPOSER_SYSTEM` prompt instructing LLM to optimize the `## Workflow Routing` table - `buildRoutingProposalPrompt(currentRouting, fullSkillContent, failurePatterns, missedQueries, skillName)` - `generateRoutingProposal()` → `BodyEvolutionProposal` with `evolution_target: "routing_table"` @@ -211,6 +226,7 @@ export interface SkillsBenchTask { ### 2b. Routing table validation **File:** `cli/selftune/evolution/validate-routing.ts` (new) + - Reuses `buildTriggerCheckPrompt` from `utils/trigger-check.ts` but passes routing table as context - Structural check: valid markdown table syntax with `| Trigger | Workflow |` columns - Same before/after comparison as `validateProposal()` → `BodyValidationResult` @@ -218,6 +234,7 @@ export interface SkillsBenchTask { ### 2c. Full body proposal generation (upskill port) **File:** `cli/selftune/evolution/propose-body.ts` (new) + - `BODY_GENERATOR_SYSTEM` — teacher LLM generates entire SKILL.md body - `buildBodyGenerationPrompt(currentContent, failurePatterns, missedQueries, skillName, fewShotExamples?)` - `generateBodyProposal(currentContent, failurePatterns, missedQueries, skillName, skillPath, teacherAgent, modelFlag?)` → `BodyEvolutionProposal` @@ -225,6 +242,7 @@ export interface SkillsBenchTask { ### 2d. Full body validation (3-gate) **File:** `cli/selftune/evolution/validate-body.ts` (new) + - **Gate 1 (structural):** Pure code — YAML frontmatter present, `# Title` exists, `## Workflow Routing` preserved if original had it. No LLM. - **Gate 2 (trigger accuracy):** Student model YES/NO per eval entry on extracted description. Reuses `buildTriggerCheckPrompt` from shared utils. - **Gate 3 (quality):** Student model rates body clarity/completeness 0.0-1.0. @@ -233,12 +251,14 @@ export interface SkillsBenchTask { ### 2e. Body refinement (upskill refine loop port) **File:** `cli/selftune/evolution/refine-body.ts` (new) + - `BODY_REFINER_SYSTEM` — takes failure feedback, asks teacher to revise specific sections - `refineBodyProposal(currentProposal, failureFeedback, validationFailures, qualityScore, teacherAgent, modelFlag?)` → `BodyEvolutionProposal` ### 2f. Body evolution orchestrator **File:** `cli/selftune/evolution/evolve-body.ts` (new, CLI command: `evolve body`) + - `EvolveBodyDeps` interface (dependency injection matching `evolve.ts` pattern) - `EvolveBodyOptions` with `target`, `teacherAgent`, `studentAgent`, `taskDescription`, `fewShotPaths` - Orchestrator loop: @@ -256,6 +276,7 @@ export interface SkillsBenchTask { ### 2g. CLI command routing **File:** `cli/selftune/index.ts` (modify) + - Add `case "evolve body"` routing to `evolve-body.ts` - Flags: `--skill`, `--skill-path`, `--target routing_table|full_body`, `--teacher-agent`, `--student-agent`, `--teacher-model`, `--student-model`, `--dry-run`, `--task-description`, `--few-shot` @@ -277,6 +298,7 @@ export interface SkillsBenchTask { ### 3a. Baseline measurement module **File:** `cli/selftune/eval/baseline.ts` (new) + - `measureBaseline(evalSet, skillDescription, agent)` → `BaselineResult` - Runs trigger check against EMPTY string description (no-skill baseline) - Runs trigger check against current description (with-skill) @@ -287,6 +309,7 @@ export interface SkillsBenchTask { ### 3b. Wire baseline into evolve command **File:** `cli/selftune/evolution/evolve.ts` (modify) + - Add `--with-baseline` flag - When enabled: call `measureBaseline()` before deploying - Gate deployment on `lift > 0.05` — if skill doesn't add value over no-skill, don't evolve it @@ -295,6 +318,7 @@ export interface SkillsBenchTask { ### 3c. Standalone baseline CLI command **File:** `cli/selftune/index.ts` (modify) + - Add `case "grade baseline"` routing - `selftune grade baseline --skill --skill-path [--agent claude]` @@ -311,6 +335,7 @@ export interface SkillsBenchTask { ### 4a. Token efficiency scoring **File:** `cli/selftune/evolution/pareto.ts` (modify) + - Add `computeTokenEfficiencyScore(skillName, telemetry: SessionTelemetryRecord[]): number` - Finds sessions WITH skill (skill in `skills_triggered[]`) vs without - Computes avg total tokens for each group @@ -320,6 +345,7 @@ export interface SkillsBenchTask { ### 4b. Extend Pareto dominance to 5 dimensions **File:** `cli/selftune/evolution/pareto.ts` (modify) + - Extend `dominates()` to accept optional `token_efficiency_score` on candidates - When present, adds a 5th dimension to Pareto comparison - `computeParetoFrontier()` uses it if available @@ -328,6 +354,7 @@ export interface SkillsBenchTask { ### 4c. Wire into evolve orchestrator **File:** `cli/selftune/evolution/evolve.ts` (modify) + - Add `--token-efficiency` flag - When enabled and Pareto mode active: compute token efficiency per candidate, pass to Pareto functions - Log token metrics in audit entry details @@ -346,6 +373,7 @@ export interface SkillsBenchTask { ### 5a. Unit test runner **File:** `cli/selftune/eval/unit-test.ts` (new) + - `loadUnitTests(testsPath: string): SkillUnitTest[]` — reads JSON file - `runUnitTest(test, skillDescription, agent): UnitTestResult` - `trigger_check` assertions: use `buildTriggerCheckPrompt` from shared utils + `callLlm` @@ -357,6 +385,7 @@ export interface SkillsBenchTask { ### 5b. Unit test generator **File:** `cli/selftune/eval/generate-unit-tests.ts` (new) + - `generateUnitTests(skillName, skillPath, evalSet, agent): SkillUnitTest[]` - LLM generates test cases from skill content + eval failures (upskill pattern) - Few-shot prompt with example test cases @@ -365,6 +394,7 @@ export interface SkillsBenchTask { ### 5c. CLI command **File:** `cli/selftune/index.ts` (modify) + - `selftune eval unit-test --skill --tests [--run-agent] [--generate]` - `--generate` flag creates tests from skill content; without it, runs existing tests @@ -382,6 +412,7 @@ export interface SkillsBenchTask { ### 6a. Composability analyzer **File:** `cli/selftune/eval/composability.ts` (new) + - `analyzeComposability(skillName, telemetry: SessionTelemetryRecord[], window?): ComposabilityReport` - Filter sessions where `skills_triggered` includes `skillName` - For each co-occurring skill: compute avg `errors_encountered` with both vs alone @@ -392,6 +423,7 @@ export interface SkillsBenchTask { ### 6b. CLI command **File:** `cli/selftune/index.ts` (modify) + - `selftune eval composability --skill [--window N]` - Reads `session_telemetry_log.jsonl`, calls `analyzeComposability()`, prints report @@ -408,6 +440,7 @@ export interface SkillsBenchTask { ### 7a. SkillsBench task parser **File:** `cli/selftune/eval/import-skillsbench.ts` (new) + - `parseSkillsBenchDir(dirPath: string): SkillsBenchTask[]` - Reads `tasks/*/instruction.md` files - Extracts task description as query candidates @@ -419,6 +452,7 @@ export interface SkillsBenchTask { ### 7b. CLI command **File:** `cli/selftune/index.ts` (modify) + - `selftune eval import --dir --skill --output [--match-strategy exact|fuzzy]` ### 7c. Tests @@ -446,34 +480,34 @@ Phase 2 (all parallel): ## New Files Summary -| File | Workstream | Purpose | -|------|-----------|---------| -| `cli/selftune/utils/trigger-check.ts` | 1 | Shared trigger-check prompts | -| `cli/selftune/evolution/propose-routing.ts` | 2 | Routing table proposal LLM | -| `cli/selftune/evolution/validate-routing.ts` | 2 | Routing table validation | -| `cli/selftune/evolution/propose-body.ts` | 2 | Full body generation (teacher) | -| `cli/selftune/evolution/validate-body.ts` | 2 | 3-gate body validation (student) | -| `cli/selftune/evolution/refine-body.ts` | 2 | Iterative body refinement | -| `cli/selftune/evolution/evolve-body.ts` | 2 | Body evolution orchestrator | -| `cli/selftune/eval/baseline.ts` | 3 | No-skill baseline comparison | -| `cli/selftune/eval/unit-test.ts` | 5 | Skill unit test runner | -| `cli/selftune/eval/generate-unit-tests.ts` | 5 | Unit test auto-generation | -| `cli/selftune/eval/composability.ts` | 6 | Multi-skill conflict detection | -| `cli/selftune/eval/import-skillsbench.ts` | 7 | SkillsBench corpus importer | +| File | Workstream | Purpose | +| -------------------------------------------- | ---------- | -------------------------------- | +| `cli/selftune/utils/trigger-check.ts` | 1 | Shared trigger-check prompts | +| `cli/selftune/evolution/propose-routing.ts` | 2 | Routing table proposal LLM | +| `cli/selftune/evolution/validate-routing.ts` | 2 | Routing table validation | +| `cli/selftune/evolution/propose-body.ts` | 2 | Full body generation (teacher) | +| `cli/selftune/evolution/validate-body.ts` | 2 | 3-gate body validation (student) | +| `cli/selftune/evolution/refine-body.ts` | 2 | Iterative body refinement | +| `cli/selftune/evolution/evolve-body.ts` | 2 | Body evolution orchestrator | +| `cli/selftune/eval/baseline.ts` | 3 | No-skill baseline comparison | +| `cli/selftune/eval/unit-test.ts` | 5 | Skill unit test runner | +| `cli/selftune/eval/generate-unit-tests.ts` | 5 | Unit test auto-generation | +| `cli/selftune/eval/composability.ts` | 6 | Multi-skill conflict detection | +| `cli/selftune/eval/import-skillsbench.ts` | 7 | SkillsBench corpus importer | ## Modified Files Summary -| File | Workstream | Changes | -|------|-----------|---------| -| `cli/selftune/types.ts` | 1 | All new interfaces | -| `cli/selftune/utils/llm-call.ts` | 1 | `modelFlag` parameter | -| `cli/selftune/utils/transcript.ts` | 1 | Token extraction | -| `cli/selftune/evolution/deploy-proposal.ts` | 1 | `parseSkillSections`, `replaceSection`, `replaceBody` | -| `cli/selftune/evolution/validate-proposal.ts` | 1 | Extract trigger-check to shared util | -| `cli/selftune/evolution/pareto.ts` | 4 | Token efficiency dimension | -| `cli/selftune/evolution/evolve.ts` | 3, 4 | `--with-baseline`, `--token-efficiency` flags | -| `cli/selftune/index.ts` | All | 5 new command routes | -| `lint-architecture.ts` | 1 | Add `EVAL_FILES` + new evolution files | +| File | Workstream | Changes | +| --------------------------------------------- | ---------- | ----------------------------------------------------- | +| `cli/selftune/types.ts` | 1 | All new interfaces | +| `cli/selftune/utils/llm-call.ts` | 1 | `modelFlag` parameter | +| `cli/selftune/utils/transcript.ts` | 1 | Token extraction | +| `cli/selftune/evolution/deploy-proposal.ts` | 1 | `parseSkillSections`, `replaceSection`, `replaceBody` | +| `cli/selftune/evolution/validate-proposal.ts` | 1 | Extract trigger-check to shared util | +| `cli/selftune/evolution/pareto.ts` | 4 | Token efficiency dimension | +| `cli/selftune/evolution/evolve.ts` | 3, 4 | `--with-baseline`, `--token-efficiency` flags | +| `cli/selftune/index.ts` | All | 5 new command routes | +| `lint-architecture.ts` | 1 | Add `EVAL_FILES` + new evolution files | --- diff --git a/docs/exec-plans/tech-debt-tracker.md b/docs/exec-plans/tech-debt-tracker.md index 480e4185..dff169e2 100644 --- a/docs/exec-plans/tech-debt-tracker.md +++ b/docs/exec-plans/tech-debt-tracker.md @@ -4,26 +4,26 @@ Track known technical debt with priority and ownership. -| ID | Description | Domain | Priority | Owner | Status | Created | Updated | -|----|-------------|--------|----------|-------|--------|---------|---------| -| TD-001 | Add CI pipeline (bun test + lint-architecture.ts) | Infra | High | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-002 | Schema validation for all JSONL writers | Telemetry | High | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-003 | Tests for hooks-to-evals.ts | Eval | Medium | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-004 | Tests for grade-session.ts | Grading | Medium | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-005 | Implement v0.3 Evolution module | Evolution | Low | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-006 | Migrate Python to Bun/TypeScript | Infra | High | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-007 | Wire deployProposal into evolve orchestrator. Note: module implementation (`deploy-proposal.ts`) is complete with tests, but is not yet imported/wired into `evolve.ts`. | Evolution | Medium | — | Open | 2026-02-28 | 2026-03-02 | -| TD-008 | End-to-end integration test with real LLM call | Evolution | Low | — | Open | 2026-02-28 | 2026-02-28 | -| TD-009 | Add evolution/monitoring to lint-architecture.ts import rules | Infra | Medium | — | Closed | 2026-02-28 | 2026-02-28 | -| TD-010 | `cli/selftune/utils/logging.ts` has no test file — violates golden-principles testing rule | Testing | Medium | — | Open | 2026-03-01 | 2026-03-01 | -| TD-011 | `cli/selftune/utils/seeded-random.ts` has no test file — violates golden-principles testing rule | Testing | Medium | — | Open | 2026-03-01 | 2026-03-01 | -| TD-012 | Dashboard server test (`tests/dashboard/dashboard-server.test.ts`) was flaky around legacy SSE `/api/events` behavior | Testing | Medium | — | Closed | 2026-03-03 | 2026-03-14 | -| TD-013 | Migrate badge/report endpoints (`/badge/:name`, `/report/:name`) from JSONL status path to SQLite-backed queries | Dashboard | Low | — | Open | 2026-03-17 | 2026-03-17 | -| TD-014 | Add `regression_detected` column to SQLite skill summaries — `deriveStatus()` currently uses only pass rate + check count | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | -| TD-015 | Move `computeMonitoringSnapshot()` logic into SQLite materializer or query helper | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | -| TD-016 | Wire SPA action buttons (watch/evolve/rollback) to `/api/actions/*` endpoints | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | -| TD-017 | `readJsonl` fallback still exists in some modules for test paths — should migrate tests to use `_setTestDb()` injection pattern | Testing | Medium | — | Open | 2026-03-17 | 2026-03-17 | -| TD-018 | `contribute/bundle.ts` still has JSONL fallback for custom paths — should use SQLite exclusively | Data | Medium | — | Open | 2026-03-17 | 2026-03-17 | +| ID | Description | Domain | Priority | Owner | Status | Created | Updated | +| ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | -------- | ----- | ------ | ---------- | ---------- | +| TD-001 | Add CI pipeline (bun test + lint-architecture.ts) | Infra | High | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-002 | Schema validation for all JSONL writers | Telemetry | High | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-003 | Tests for hooks-to-evals.ts | Eval | Medium | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-004 | Tests for grade-session.ts | Grading | Medium | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-005 | Implement v0.3 Evolution module | Evolution | Low | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-006 | Migrate Python to Bun/TypeScript | Infra | High | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-007 | Wire deployProposal into evolve orchestrator. Note: module implementation (`deploy-proposal.ts`) is complete with tests, but is not yet imported/wired into `evolve.ts`. | Evolution | Medium | — | Open | 2026-02-28 | 2026-03-02 | +| TD-008 | End-to-end integration test with real LLM call | Evolution | Low | — | Open | 2026-02-28 | 2026-02-28 | +| TD-009 | Add evolution/monitoring to lint-architecture.ts import rules | Infra | Medium | — | Closed | 2026-02-28 | 2026-02-28 | +| TD-010 | `cli/selftune/utils/logging.ts` has no test file — violates golden-principles testing rule | Testing | Medium | — | Open | 2026-03-01 | 2026-03-01 | +| TD-011 | `cli/selftune/utils/seeded-random.ts` has no test file — violates golden-principles testing rule | Testing | Medium | — | Open | 2026-03-01 | 2026-03-01 | +| TD-012 | Dashboard server test (`tests/dashboard/dashboard-server.test.ts`) was flaky around legacy SSE `/api/events` behavior | Testing | Medium | — | Closed | 2026-03-03 | 2026-03-14 | +| TD-013 | Migrate badge/report endpoints (`/badge/:name`, `/report/:name`) from JSONL status path to SQLite-backed queries | Dashboard | Low | — | Open | 2026-03-17 | 2026-03-17 | +| TD-014 | Add `regression_detected` column to SQLite skill summaries — `deriveStatus()` currently uses only pass rate + check count | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | +| TD-015 | Move `computeMonitoringSnapshot()` logic into SQLite materializer or query helper | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | +| TD-016 | Wire SPA action buttons (watch/evolve/rollback) to `/api/actions/*` endpoints | Dashboard | Medium | — | Open | 2026-03-17 | 2026-03-17 | +| TD-017 | `readJsonl` fallback still exists in some modules for test paths — should migrate tests to use `_setTestDb()` injection pattern | Testing | Medium | — | Open | 2026-03-17 | 2026-03-17 | +| TD-018 | `contribute/bundle.ts` still has JSONL fallback for custom paths — should use SQLite exclusively | Data | Medium | — | Open | 2026-03-17 | 2026-03-17 | ## Priority Definitions diff --git a/docs/golden-principles.md b/docs/golden-principles.md index 1e6ae1ea..d1ea729d 100644 --- a/docs/golden-principles.md +++ b/docs/golden-principles.md @@ -79,28 +79,28 @@ Opinionated mechanical rules that encode human taste for selftune. These go beyo 12. **Dependency injection for testability** Evolution modules accept injectable dependencies (`_deps` parameter) so tests avoid `mock.module` contamination. Real imports are the default; tests inject mocks. -1. **Pre-gates before LLM grading** +13. **Pre-gates before LLM grading** Deterministic checks (SKILL.md read, tools called, error count, session completed) run before the LLM grader. If all expectations resolve via pre-gates, the LLM call is skipped entirely. Pre-gate results are tagged with `source: "pre-gate"`. -2. **Graduated scores over binary pass/fail** +14. **Graduated scores over binary pass/fail** Every grading expectation carries a `score` (0.0-1.0) alongside the binary `passed` boolean. Summaries include `mean_score` and `score_std_dev`. Default: `score ?? (passed ? 1.0 : 0.0)`. -3. **Pareto frontier for multi-candidate selection** +15. **Pareto frontier for multi-candidate selection** When generating multiple proposal candidates, use Pareto dominance across invocation type dimensions (explicit, implicit, contextual, negative) to select the best candidate. Complementary candidates may be merged. All Pareto functions are pure — no I/O. ## Activation and Agent Rules 1. **Suggestions are advisory, never blocking** - Auto-activation hooks suggest commands but never block the user prompt. Fail-open design: if the hook errors, the session continues uninterrupted. + Auto-activation hooks suggest commands but never block the user prompt. Fail-open design: if the hook errors, the session continues uninterrupted. 2. **Evolution memory survives resets** - The 3-file memory system (`~/.selftune/memory/`) persists context, plans, and decisions across sessions. `decisions.md` is append-only so history is never lost. + The 3-file memory system (`~/.selftune/memory/`) persists context, plans, and decisions across sessions. `decisions.md` is append-only so history is never lost. 3. **Guardrails protect active evolutions** - `evolution-guard.ts` blocks SKILL.md edits on monitored skills during active evolutions. Exit code 2 blocks with a message explaining why; never silent failure. + `evolution-guard.ts` blocks SKILL.md edits on monitored skills during active evolutions. Exit code 2 blocks with a message explaining why; never silent failure. 4. **Agents are pure markdown, cheap to create** - Specialized Claude Code agents (diagnosis-analyst, pattern-analyst, evolution-reviewer, integration-guide) are markdown files with focused single-purpose instructions. Prefer narrow, single-purpose agents over general-purpose ones. + Specialized Claude Code agents (diagnosis-analyst, pattern-analyst, evolution-reviewer, integration-guide) are markdown files with focused single-purpose instructions. Prefer narrow, single-purpose agents over general-purpose ones. ## Anti-Patterns diff --git a/docs/integration-guide.md b/docs/integration-guide.md index 03d0b64b..751c378b 100644 --- a/docs/integration-guide.md +++ b/docs/integration-guide.md @@ -58,6 +58,7 @@ my-project/ **Template:** `templates/single-skill-settings.json` **What you get:** + - Prompt logging on every user query - Skill evaluation on every `Read` tool use - Session telemetry on session stop @@ -99,6 +100,7 @@ my-project/ **Template:** `templates/multi-skill-settings.json` **Differences from single-skill:** + - Includes `evolution-guard.ts` in `PreToolUse` hooks to protect active evolutions - Activation rules (`activation-rules.json`) control which suggestions fire - Each skill gets independent eval/grade/evolve cycles @@ -107,12 +109,12 @@ my-project/ selftune ships with four default activation rules (see `cli/selftune/activation-rules.ts`): -| Rule ID | Trigger | Suggestion | -|---------|---------|------------| -| `post-session-diagnostic` | >2 unmatched queries in session | `selftune last` | -| `grading-threshold-breach` | Session pass rate < 60% | `selftune evolve` | -| `stale-evolution` | No evolution in >7 days + pending false negatives | `selftune evolve` | -| `regression-detected` | Monitoring snapshot shows regression | `selftune evolve rollback` | +| Rule ID | Trigger | Suggestion | +| -------------------------- | ------------------------------------------------- | -------------------------- | +| `post-session-diagnostic` | >2 unmatched queries in session | `selftune last` | +| `grading-threshold-breach` | Session pass rate < 60% | `selftune evolve` | +| `stale-evolution` | No evolution in >7 days + pending false negatives | `selftune evolve` | +| `regression-detected` | Monitoring snapshot shows regression | `selftune evolve rollback` | Rules fire at most once per session (tracked via session state files in `~/.selftune/`). To disable a rule, set `"enabled": false` in your `activation-rules.json`. @@ -148,6 +150,7 @@ my-monorepo/ 4. Run `selftune doctor`. **Tips:** + - Run `selftune init` from the monorepo root, not from individual packages. - Skill paths are stored as absolute paths in telemetry, so cross-package analysis works. - Use `selftune status --skill ` to check per-skill metrics. @@ -175,6 +178,7 @@ selftune ingest codex --dir /path/to/codex/sessions ``` **Limitations:** + - No real-time hook-based telemetry (Codex has no hook system) - Eval and grading work the same way once sessions are ingested - Auto-activation suggestions are not available (no `UserPromptSubmit` hook) @@ -198,6 +202,7 @@ The default database path is `~/.local/share/opencode/opencode.db`. Override with `--db /path/to/opencode.db`. **Limitations:** + - Same as Codex: no real-time hooks, batch ingest only - Session format differs; selftune normalizes on import @@ -225,13 +230,13 @@ Use `--since 2026-02-01` to limit scope. Use `--dry-run` to preview. **Options:** -| Flag | Description | -|------|-------------| -| `--agents-dir ` | Override default `~/.openclaw/agents/` directory | -| `--since ` | Only ingest sessions modified after this date (YYYY-MM-DD) | -| `--dry-run` | Preview what would be ingested without writing to logs | -| `--force` | Re-ingest all sessions, ignoring the marker file | -| `--verbose` / `-v` | Show per-session progress during ingestion | +| Flag | Description | +| --------------------- | ---------------------------------------------------------- | +| `--agents-dir ` | Override default `~/.openclaw/agents/` directory | +| `--since ` | Only ingest sessions modified after this date (YYYY-MM-DD) | +| `--dry-run` | Preview what would be ingested without writing to logs | +| `--force` | Re-ingest all sessions, ignoring the marker file | +| `--verbose` / `-v` | Show per-session progress during ingestion | **Skill detection:** OpenClaw doesn't explicitly log skill triggers. selftune infers triggers by detecting `SKILL.md` file reads and matching tool call names @@ -254,12 +259,12 @@ selftune cron setup This registers 4 jobs with OpenClaw: -| Job | Schedule | Purpose | -|-----|----------|---------| -| `selftune-ingest` | Every 30 min | Ingest new sessions | -| `selftune-status` | Daily 8am | Health check, flag skills below 80% | +| Job | Schedule | Purpose | +| ----------------- | ----------------- | ------------------------------------------------- | +| `selftune-ingest` | Every 30 min | Ingest new sessions | +| `selftune-status` | Daily 8am | Health check, flag skills below 80% | | `selftune-evolve` | Weekly Sunday 3am | Full evolution pipeline on undertriggering skills | -| `selftune-watch` | Every 6 hours | Regression monitoring on recently evolved skills | +| `selftune-watch` | Every 6 hours | Regression monitoring on recently evolved skills | 1. Customize timezone: `selftune cron setup --tz America/New_York` 2. Preview without registering: `selftune cron setup --dry-run` @@ -288,6 +293,7 @@ Next agent turn uses improved skill description Each cron run uses an **isolated session** — no context pollution between runs. **Safety controls:** + - `--dry-run` before real deploys - <5% regression threshold on existing triggers - Auto-rollback via `selftune watch --auto-rollback` @@ -296,6 +302,7 @@ Each cron run uses an **isolated session** — no context pollution between runs - Manual override: `selftune evolve rollback --skill ` at any time **Limitations:** + - Each cron run costs tokens (full LLM session, ~5K tokens estimated) - Cron tools may be blocked in Docker sandbox mode (OpenClaw issue #29921) - Newly created cron jobs may not fire until Gateway restart (known OpenClaw bug) @@ -334,6 +341,7 @@ All agents produce the same JSONL log format (`session_telemetry_log.jsonl`, record identifies the originating agent. **Tips:** + - Use `selftune status` to see aggregated metrics across agents. - Grading and evolution work on the merged dataset. - Keep `~/.selftune/config.json` agent-specific on each machine. @@ -344,16 +352,17 @@ record identifies the originating agent. selftune uses Claude Code hooks for real-time telemetry. Here is the full hook chain: -| Hook Event | Script | Purpose | -|-----------|--------|---------| -| `UserPromptSubmit` | `prompt-log.ts` | Log every user query to `all_queries_log.jsonl` | -| `UserPromptSubmit` | `auto-activate.ts` | Evaluate activation rules and show suggestions | -| `PreToolUse` (Write/Edit) | `skill-change-guard.ts` | Prevent unreviewed changes to SKILL.md files | -| `PreToolUse` (Write/Edit) | `evolution-guard.ts` | Block changes that conflict with active evolutions | -| `PostToolUse` (Read) | `skill-eval.ts` | Track which skills are triggered by queries | -| `Stop` | `session-stop.ts` | Capture end-of-session telemetry | +| Hook Event | Script | Purpose | +| ------------------------- | ----------------------- | -------------------------------------------------- | +| `UserPromptSubmit` | `prompt-log.ts` | Log every user query to `all_queries_log.jsonl` | +| `UserPromptSubmit` | `auto-activate.ts` | Evaluate activation rules and show suggestions | +| `PreToolUse` (Write/Edit) | `skill-change-guard.ts` | Prevent unreviewed changes to SKILL.md files | +| `PreToolUse` (Write/Edit) | `evolution-guard.ts` | Block changes that conflict with active evolutions | +| `PostToolUse` (Read) | `skill-eval.ts` | Track which skills are triggered by queries | +| `Stop` | `session-stop.ts` | Capture end-of-session telemetry | All hooks: + - Exit code 0 on success (non-blocking by design) - Write to stderr for advisory messages (shown to Claude as system messages) - Have 5-15 second timeouts to avoid blocking the agent @@ -367,12 +376,12 @@ All hooks: Run `selftune doctor` and address each failing check: -| Check | Fix | -|-------|-----| -| Config missing | Run `selftune init` | -| Hooks not installed | Merge the appropriate template into `~/.claude/settings.json` | -| Log directory missing | Run `selftune init --force` | -| Stale config | Run `selftune init --force` to regenerate | +| Check | Fix | +| --------------------- | ------------------------------------------------------------- | +| Config missing | Run `selftune init` | +| Hooks not installed | Merge the appropriate template into `~/.claude/settings.json` | +| Log directory missing | Run `selftune init --force` | +| Stale config | Run `selftune init --force` to regenerate | ### Hooks not firing @@ -432,6 +441,7 @@ Run `selftune doctor` and address each failing check: ### Workspace detection issues If `selftune init` detects the wrong workspace type: + 1. Use `--force` to reinitialize. 2. The detection scans for `SKILL.md` files and monorepo markers (`package.json` workspaces, `pnpm-workspace.yaml`, `lerna.json`). diff --git a/docs/launch-playbook-tracker.md b/docs/launch-playbook-tracker.md index 2b4eecc5..efcc773b 100644 --- a/docs/launch-playbook-tracker.md +++ b/docs/launch-playbook-tracker.md @@ -29,46 +29,46 @@ Manual actions for maximizing selftune's open-source impact. Check items off as ## Awesome List Submissions -| List | Category | One-liner | -|------|----------|-----------| -| [awesome-cli-apps](https://github.com/agarrharr/awesome-cli-apps) | Developer Tools | Self-improving skills CLI for AI agents | -| [awesome-bun](https://github.com/apvarun/awesome-bun) | CLI Tools | TypeScript CLI built on Bun | -| [awesome-typescript](https://github.com/dzharii/awesome-typescript) | CLI | CLI for AI skill improvement | -| [awesome-ai-agents](https://github.com/e2b-dev/awesome-ai-agents) | Developer Tools | Continuous improvement for agent skills | -| [awesome-claude](https://github.com/anthropics/anthropic-cookbook) | Tools | Self-improving skills for Claude Code | -| [awesome-devtools](https://github.com/moimikey/awesome-devtools) | CLI | Self-improving agent skills | -| [awesome-testing](https://github.com/TheJambo/awesome-testing) | Tools | Eval generation from real usage | -| [awesome-open-source](https://github.com/cornelius/awesome-open-source) | Tools | OSS self-improving skills for AI agents | -| [awesome-llm](https://github.com/Hannibal046/Awesome-LLM) | Tools | LLM agent skill tuning | +| List | Category | One-liner | +| ----------------------------------------------------------------------- | --------------- | --------------------------------------- | +| [awesome-cli-apps](https://github.com/agarrharr/awesome-cli-apps) | Developer Tools | Self-improving skills CLI for AI agents | +| [awesome-bun](https://github.com/apvarun/awesome-bun) | CLI Tools | TypeScript CLI built on Bun | +| [awesome-typescript](https://github.com/dzharii/awesome-typescript) | CLI | CLI for AI skill improvement | +| [awesome-ai-agents](https://github.com/e2b-dev/awesome-ai-agents) | Developer Tools | Continuous improvement for agent skills | +| [awesome-claude](https://github.com/anthropics/anthropic-cookbook) | Tools | Self-improving skills for Claude Code | +| [awesome-devtools](https://github.com/moimikey/awesome-devtools) | CLI | Self-improving agent skills | +| [awesome-testing](https://github.com/TheJambo/awesome-testing) | Tools | Eval generation from real usage | +| [awesome-open-source](https://github.com/cornelius/awesome-open-source) | Tools | OSS self-improving skills for AI agents | +| [awesome-llm](https://github.com/Hannibal046/Awesome-LLM) | Tools | LLM agent skill tuning | --- ## Newsletter Pitches -| Newsletter | URL | Pitch Angle | -|------------|-----|-------------| -| TLDR | https://tldr.tech/submit | AI agent skills that learn how you work — and fix themselves | -| Console.dev | https://console.dev/submit | Developer tool for AI skill improvement | -| Changelog | https://changelog.com/submit | OSS CLI that watches and improves agent skills | -| Hacker Newsletter | https://hackernewsletter.com | Show HN: selftune — skills that learn how you work | -| TypeScript Weekly | https://typescript-weekly.com | Bun + TypeScript CLI for agent eval | -| Node Weekly | https://nodeweekly.com/submit | CLI tool: observe, grade, and evolve AI skills | -| AI Weekly | https://aiweekly.co/submit | Continuous improvement loop for LLM agent skills | -| DevOps Weekly | https://devopsweekly.com | Self-improving AI agent skills — observe, evolve, deploy | -| The Pragmatic Engineer | https://newsletter.pragmaticengineer.com | Self-improving AI skills — a new category | +| Newsletter | URL | Pitch Angle | +| ---------------------- | ---------------------------------------- | ------------------------------------------------------------ | +| TLDR | https://tldr.tech/submit | AI agent skills that learn how you work — and fix themselves | +| Console.dev | https://console.dev/submit | Developer tool for AI skill improvement | +| Changelog | https://changelog.com/submit | OSS CLI that watches and improves agent skills | +| Hacker Newsletter | https://hackernewsletter.com | Show HN: selftune — skills that learn how you work | +| TypeScript Weekly | https://typescript-weekly.com | Bun + TypeScript CLI for agent eval | +| Node Weekly | https://nodeweekly.com/submit | CLI tool: observe, grade, and evolve AI skills | +| AI Weekly | https://aiweekly.co/submit | Continuous improvement loop for LLM agent skills | +| DevOps Weekly | https://devopsweekly.com | Self-improving AI agent skills — observe, evolve, deploy | +| The Pragmatic Engineer | https://newsletter.pragmaticengineer.com | Self-improving AI skills — a new category | --- ## Conference CFPs -| Conference | Relevance | URL | -|------------|-----------|-----| -| NodeConf EU | Bun/TypeScript CLI tooling | https://www.nodeconf.eu | -| AI Engineer Summit | Self-improving AI agent skills | https://www.ai.engineer | -| Open Source Summit | OSS project showcase | https://events.linuxfoundation.org | -| TypeScript Congress | TypeScript CLI architecture | https://typescriptcongress.com | -| DevOpsDays | Self-improving skills for AI agents | https://devopsdays.org | -| JSConf | JavaScript/TypeScript tooling | https://jsconf.com | +| Conference | Relevance | URL | +| ------------------- | ----------------------------------- | ---------------------------------- | +| NodeConf EU | Bun/TypeScript CLI tooling | https://www.nodeconf.eu | +| AI Engineer Summit | Self-improving AI agent skills | https://www.ai.engineer | +| Open Source Summit | OSS project showcase | https://events.linuxfoundation.org | +| TypeScript Congress | TypeScript CLI architecture | https://typescriptcongress.com | +| DevOpsDays | Self-improving skills for AI agents | https://devopsdays.org | +| JSConf | JavaScript/TypeScript tooling | https://jsconf.com | --- diff --git a/docs/operator-guide.md b/docs/operator-guide.md index dbbcb09f..a697e698 100644 --- a/docs/operator-guide.md +++ b/docs/operator-guide.md @@ -187,18 +187,18 @@ selftune init --enable-autonomy ### What gets scheduled -| Job | Purpose | -| --- | --- | -| `selftune sync` | refresh source-truth telemetry | -| `selftune sync && selftune status` | refresh local health readout | +| Job | Purpose | +| ------------------------------------- | ----------------------------------- | +| `selftune sync` | refresh source-truth telemetry | +| `selftune sync && selftune status` | refresh local health readout | | `selftune orchestrate --max-skills 3` | run the autonomous improvement loop | ### Artifact locations -| Format | Location | -| --- | --- | -| cron | `~/.selftune/schedule/selftune.crontab` | -| launchd | `~/Library/LaunchAgents/com.selftune.*.plist` | +| Format | Location | +| ------- | -------------------------------------------------------- | +| cron | `~/.selftune/schedule/selftune.crontab` | +| launchd | `~/Library/LaunchAgents/com.selftune.*.plist` | | systemd | `~/.config/systemd/user/selftune-*.timer` and `.service` | ### OpenClaw-specific scheduling @@ -208,15 +208,15 @@ It is still supported, but it is not the primary product path. ## Important Local State -| Path | Meaning | -| --- | --- | -| `~/.selftune/config.json` | detected agent identity and bootstrap config | -| `~/.selftune/selftune.db` | SQLite operational database (direct-write + materialized from JSONL) | -| `~/.claude/session_telemetry_log.jsonl` | session-level telemetry | -| `~/.claude/all_queries_log.jsonl` | all observed user queries | -| `~/.claude/skill_usage_repaired.jsonl` | repaired/source-truth skill usage | -| `~/.claude/evolution_audit_log.jsonl` | proposal, deploy, and rollback audit trail | -| `~/.claude/orchestrate_runs.jsonl` | persisted orchestrate run reports and skill-level actions | +| Path | Meaning | +| --------------------------------------- | -------------------------------------------------------------------- | +| `~/.selftune/config.json` | detected agent identity and bootstrap config | +| `~/.selftune/selftune.db` | SQLite operational database (direct-write + materialized from JSONL) | +| `~/.claude/session_telemetry_log.jsonl` | session-level telemetry | +| `~/.claude/all_queries_log.jsonl` | all observed user queries | +| `~/.claude/skill_usage_repaired.jsonl` | repaired/source-truth skill usage | +| `~/.claude/evolution_audit_log.jsonl` | proposal, deploy, and rollback audit trail | +| `~/.claude/orchestrate_runs.jsonl` | persisted orchestrate run reports and skill-level actions | ## Dashboard Checks diff --git a/docs/product-specs/index.md b/docs/product-specs/index.md index 608bbe3e..cda02515 100644 --- a/docs/product-specs/index.md +++ b/docs/product-specs/index.md @@ -4,11 +4,11 @@ Registry of all product specifications. -| Spec | Status | Priority | Owner | -|------|--------|----------|-------| -| [PRD.md](../../PRD.md) | Current | P0 | WellDunDun | -| [ICP & GTM Strategy](../strategy/icp-gtm-strategy.md) | Current | P0 | Daniel Petro | -| [OpenClaw Integration](../strategy/openclaw-integration.md) | Current | P0 | Daniel Petro | +| Spec | Status | Priority | Owner | +| ----------------------------------------------------------- | ------- | -------- | ------------ | +| [PRD.md](../../PRD.md) | Current | P0 | WellDunDun | +| [ICP & GTM Strategy](../strategy/icp-gtm-strategy.md) | Current | P0 | Daniel Petro | +| [OpenClaw Integration](../strategy/openclaw-integration.md) | Current | P0 | Daniel Petro | ## Adding a New Spec diff --git a/package.json b/package.json index 8419f8f8..5c42c282 100644 --- a/package.json +++ b/package.json @@ -2,40 +2,43 @@ "name": "selftune", "version": "0.2.9", "description": "Self-improving skills CLI for AI agents", - "type": "module", + "keywords": [ + "agent", + "bun", + "claude-code", + "cli", + "codex", + "eval", + "evolution", + "grading", + "opencode", + "self-improving", + "selftune", + "skill", + "telemetry", + "typescript" + ], + "homepage": "https://github.com/selftune-dev/selftune#readme", + "bugs": { + "url": "https://github.com/selftune-dev/selftune/issues" + }, "license": "MIT", "author": "Daniel Petro", - "homepage": "https://github.com/selftune-dev/selftune#readme", "repository": { "type": "git", "url": "git+https://github.com/selftune-dev/selftune.git" }, - "bugs": { - "url": "https://github.com/selftune-dev/selftune/issues" - }, "funding": { "type": "github", "url": "https://github.com/sponsors/WellDunDun" }, - "keywords": [ - "selftune", - "skill", - "self-improving", - "claude-code", - "codex", - "opencode", - "eval", - "grading", - "evolution", - "cli", - "agent", - "telemetry", - "bun", - "typescript" - ], "bin": { "selftune": "bin/selftune.cjs" }, + "workspaces": [ + "packages/*", + "apps/*" + ], "files": [ "assets/", "bin/", @@ -49,12 +52,15 @@ "README.md", "CHANGELOG.md" ], + "type": "module", "scripts": { "dev": "sh -c 'if lsof -iTCP:7888 -sTCP:LISTEN >/dev/null 2>&1; then if curl -fsS http://localhost:7888/api/health | grep -q selftune-dashboard; then echo \"Using existing dashboard server on 7888\"; cd apps/local-dashboard && bunx vite --strictPort; else echo \"Port 7888 is occupied by a non-selftune service\"; exit 1; fi; else cd apps/local-dashboard && bun run dev; fi'", "dev:server": "bun --watch run cli/selftune/dashboard-server.ts --port 7888 --runtime-mode dev-server", "dev:dashboard": "bun run cli/selftune/index.ts dashboard --port 7888 --no-open", - "lint": "bunx @biomejs/biome check .", - "lint:fix": "bunx @biomejs/biome check --write .", + "lint": "bunx oxlint", + "lint:fix": "bunx oxlint --fix", + "format": "bunx oxfmt", + "format:check": "bunx oxfmt --check", "lint:arch": "bun run lint-architecture.ts", "test": "bun test tests/ packages/telemetry-contract/", "test:fast": "bun test $(find tests -name '*.test.ts' ! -name 'evolve.test.ts' ! -name 'integration.test.ts' ! -name 'dashboard-server.test.ts' ! -path '*/blog-proof/*')", @@ -65,18 +71,15 @@ "validate:subagents": "bun run scripts/validate-subagent-docs.ts", "prepublishOnly": "bun run sync-version && bun run build:dashboard", "typecheck:dashboard": "cd apps/local-dashboard && bunx tsc --noEmit", - "check": "bun run lint && bun run lint:arch && bun run typecheck:dashboard && bun run test", + "check": "bun run lint && bun run lint:arch && bun run format:check && bun run typecheck:dashboard && bun run test", "start": "bun run cli/selftune/index.ts --help" }, - "workspaces": [ - "packages/*", - "apps/*" - ], "dependencies": { "@selftune/telemetry-contract": "file:packages/telemetry-contract" }, "devDependencies": { - "@biomejs/biome": "^2.4.7", - "@types/bun": "^1.1.0" + "@types/bun": "^1.1.0", + "oxfmt": "^0.41.0", + "oxlint": "^1.56.0" } } diff --git a/packages/telemetry-contract/fixtures/golden.test.ts b/packages/telemetry-contract/fixtures/golden.test.ts index c2df37e5..da38210a 100644 --- a/packages/telemetry-contract/fixtures/golden.test.ts +++ b/packages/telemetry-contract/fixtures/golden.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test"; import { readFileSync } from "node:fs"; import { join } from "node:path"; + import { CANONICAL_SCHEMA_VERSION } from "../src/types.js"; import { isCanonicalRecord } from "../src/validators.js"; diff --git a/packages/telemetry-contract/package.json b/packages/telemetry-contract/package.json index 32ec30f0..4b7e3888 100644 --- a/packages/telemetry-contract/package.json +++ b/packages/telemetry-contract/package.json @@ -3,7 +3,6 @@ "version": "1.0.0", "private": true, "description": "Canonical telemetry schema, types, and validators for selftune", - "type": "module", "license": "MIT", "author": "Daniel Petro", "repository": { @@ -11,6 +10,7 @@ "url": "git+https://github.com/selftune-dev/selftune.git", "directory": "packages/telemetry-contract" }, + "type": "module", "exports": { ".": "./index.ts", "./schemas": "./src/schemas.ts", diff --git a/packages/telemetry-contract/src/schemas.ts b/packages/telemetry-contract/src/schemas.ts index a99ac715..35ceb19d 100644 --- a/packages/telemetry-contract/src/schemas.ts +++ b/packages/telemetry-contract/src/schemas.ts @@ -1,4 +1,5 @@ import { z } from "zod"; + import { CANONICAL_CAPTURE_MODES, CANONICAL_COMPLETION_STATUSES, diff --git a/packages/telemetry-contract/tests/compatibility.test.ts b/packages/telemetry-contract/tests/compatibility.test.ts index b31e74ba..1ea61846 100644 --- a/packages/telemetry-contract/tests/compatibility.test.ts +++ b/packages/telemetry-contract/tests/compatibility.test.ts @@ -1,4 +1,5 @@ import { describe, expect, test } from "bun:test"; + import { completePush } from "../fixtures/complete-push.js"; import { evidenceOnlyPush } from "../fixtures/evidence-only-push.js"; import { partialPushNoSessions } from "../fixtures/partial-push-no-sessions.js"; diff --git a/packages/ui/README.md b/packages/ui/README.md index fa8094a1..741eeeff 100644 --- a/packages/ui/README.md +++ b/packages/ui/README.md @@ -17,16 +17,16 @@ Add as a workspace dependency: Import from subpath exports: ```tsx -import { Badge, Button, Card } from "@selftune/ui/primitives" -import { SkillHealthGrid, EvolutionTimeline } from "@selftune/ui/components" -import { cn, timeAgo, deriveStatus, STATUS_CONFIG } from "@selftune/ui/lib" -import type { SkillCard, EvolutionEntry } from "@selftune/ui/types" +import { Badge, Button, Card } from "@selftune/ui/primitives"; +import { SkillHealthGrid, EvolutionTimeline } from "@selftune/ui/components"; +import { cn, timeAgo, deriveStatus, STATUS_CONFIG } from "@selftune/ui/lib"; +import type { SkillCard, EvolutionEntry } from "@selftune/ui/types"; ``` Or import everything from the root: ```tsx -import { Badge, SkillHealthGrid, cn, type SkillCard } from "@selftune/ui" +import { Badge, SkillHealthGrid, cn, type SkillCard } from "@selftune/ui"; ``` ## Exports @@ -35,44 +35,44 @@ import { Badge, SkillHealthGrid, cn, type SkillCard } from "@selftune/ui" shadcn/ui components built on [@base-ui/react](https://base-ui.com/): -| Component | Source | -|-----------|--------| -| `Badge`, `badgeVariants` | badge.tsx | -| `Button`, `buttonVariants` | button.tsx | -| `Card`, `CardHeader`, `CardTitle`, `CardDescription`, `CardAction`, `CardContent`, `CardFooter` | card.tsx | -| `Checkbox` | checkbox.tsx | -| `Collapsible`, `CollapsibleTrigger`, `CollapsibleContent` | collapsible.tsx | -| `DropdownMenu`, `DropdownMenuTrigger`, `DropdownMenuContent`, `DropdownMenuItem`, ... | dropdown-menu.tsx | -| `Label` | label.tsx | -| `Select`, `SelectTrigger`, `SelectContent`, `SelectItem`, ... | select.tsx | -| `Table`, `TableHeader`, `TableBody`, `TableRow`, `TableHead`, `TableCell`, ... | table.tsx | -| `Tabs`, `TabsList`, `TabsTrigger`, `TabsContent` | tabs.tsx | -| `Tooltip`, `TooltipTrigger`, `TooltipContent`, `TooltipProvider` | tooltip.tsx | +| Component | Source | +| ----------------------------------------------------------------------------------------------- | ----------------- | +| `Badge`, `badgeVariants` | badge.tsx | +| `Button`, `buttonVariants` | button.tsx | +| `Card`, `CardHeader`, `CardTitle`, `CardDescription`, `CardAction`, `CardContent`, `CardFooter` | card.tsx | +| `Checkbox` | checkbox.tsx | +| `Collapsible`, `CollapsibleTrigger`, `CollapsibleContent` | collapsible.tsx | +| `DropdownMenu`, `DropdownMenuTrigger`, `DropdownMenuContent`, `DropdownMenuItem`, ... | dropdown-menu.tsx | +| `Label` | label.tsx | +| `Select`, `SelectTrigger`, `SelectContent`, `SelectItem`, ... | select.tsx | +| `Table`, `TableHeader`, `TableBody`, `TableRow`, `TableHead`, `TableCell`, ... | table.tsx | +| `Tabs`, `TabsList`, `TabsTrigger`, `TabsContent` | tabs.tsx | +| `Tooltip`, `TooltipTrigger`, `TooltipContent`, `TooltipProvider` | tooltip.tsx | ### Domain Components (`@selftune/ui/components`) Presentational components for selftune dashboard views. No data fetching, no routing — pass data and callbacks as props. -| Component | Description | -|-----------|-------------| -| `SkillHealthGrid` | Sortable/filterable data table with drag-and-drop, pagination, and view tabs. Accepts `renderSkillName` prop for custom routing. | -| `EvolutionTimeline` | Proposal lifecycle timeline grouped by proposal ID, with pass rate deltas. | -| `ActivityPanel` | Tabbed activity feed (pending proposals, timeline events, unmatched queries). | -| `EvidenceViewer` | Full evidence trail for a proposal — side-by-side diffs, validation results, iteration rounds. | -| `SectionCards` | Dashboard metric stat cards (skills count, pass rate, unmatched, sessions, etc.). | -| `OrchestrateRunsPanel` | Collapsible orchestrate run reports with per-skill action details. | -| `InfoTip` | Small info icon with tooltip, used to explain metrics. | +| Component | Description | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------- | +| `SkillHealthGrid` | Sortable/filterable data table with drag-and-drop, pagination, and view tabs. Accepts `renderSkillName` prop for custom routing. | +| `EvolutionTimeline` | Proposal lifecycle timeline grouped by proposal ID, with pass rate deltas. | +| `ActivityPanel` | Tabbed activity feed (pending proposals, timeline events, unmatched queries). | +| `EvidenceViewer` | Full evidence trail for a proposal — side-by-side diffs, validation results, iteration rounds. | +| `SectionCards` | Dashboard metric stat cards (skills count, pass rate, unmatched, sessions, etc.). | +| `OrchestrateRunsPanel` | Collapsible orchestrate run reports with per-skill action details. | +| `InfoTip` | Small info icon with tooltip, used to explain metrics. | ### Utilities (`@selftune/ui/lib`) -| Export | Description | -|--------|-------------| -| `cn(...inputs)` | Tailwind class merge utility (clsx + tailwind-merge) | -| `timeAgo(timestamp)` | Relative time string ("3h ago", "2d ago") | -| `formatRate(rate)` | Format 0-1 rate as percentage string ("85%") | -| `deriveStatus(passRate, checks)` | Derive `SkillHealthStatus` from pass rate and check count | +| Export | Description | +| -------------------------------- | --------------------------------------------------------------- | +| `cn(...inputs)` | Tailwind class merge utility (clsx + tailwind-merge) | +| `timeAgo(timestamp)` | Relative time string ("3h ago", "2d ago") | +| `formatRate(rate)` | Format 0-1 rate as percentage string ("85%") | +| `deriveStatus(passRate, checks)` | Derive `SkillHealthStatus` from pass rate and check count | | `sortByPassRateAndChecks(items)` | Sort skill cards by pass rate ascending, then checks descending | -| `STATUS_CONFIG` | Icon, variant, and label for each `SkillHealthStatus` value | +| `STATUS_CONFIG` | Icon, variant, and label for each `SkillHealthStatus` value | ### Types (`@selftune/ui/types`) @@ -107,6 +107,7 @@ This package uses Tailwind v4. The Vite plugin auto-scans imported workspace pac Required: `react`, `react-dom` Optional (only needed by specific components): + - `@dnd-kit/*` — SkillHealthGrid drag-and-drop - `@tanstack/react-table` — SkillHealthGrid table - `react-markdown` — EvidenceViewer markdown rendering diff --git a/packages/ui/package.json b/packages/ui/package.json index 4d27e324..71bf4eba 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -3,7 +3,6 @@ "version": "1.0.0", "private": true, "description": "Shared UI components for selftune dashboards", - "type": "module", "license": "MIT", "author": "Daniel Petro", "repository": { @@ -11,6 +10,7 @@ "url": "git+https://github.com/selftune-dev/selftune.git", "directory": "packages/ui" }, + "type": "module", "exports": { ".": "./index.ts", "./primitives": "./src/primitives/index.ts", @@ -30,13 +30,13 @@ "@types/react-dom": "^19.0.0" }, "peerDependencies": { - "react": "^19.0.0", - "react-dom": "^19.0.0", "@dnd-kit/core": "^6.0.0", "@dnd-kit/modifiers": "^9.0.0", "@dnd-kit/sortable": "^10.0.0", "@dnd-kit/utilities": "^3.0.0", "@tanstack/react-table": "^8.0.0", + "react": "^19.0.0", + "react-dom": "^19.0.0", "react-markdown": "^10.0.0", "recharts": "^2.0.0" }, diff --git a/packages/ui/src/components/ActivityTimeline.tsx b/packages/ui/src/components/ActivityTimeline.tsx index f726ef42..93c8725a 100644 --- a/packages/ui/src/components/ActivityTimeline.tsx +++ b/packages/ui/src/components/ActivityTimeline.tsx @@ -1,21 +1,11 @@ -import { Badge } from "../primitives/badge" -import { - Card, - CardContent, - CardDescription, - CardHeader, - CardTitle, -} from "../primitives/card" -import { Tabs, TabsContent, TabsList, TabsTrigger } from "../primitives/tabs" -import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../primitives/tooltip" -import type { EvolutionEntry, PendingProposal, UnmatchedQuery } from "../types" -import { timeAgo } from "../lib/format" -import { - ClockIcon, - GitPullRequestArrowIcon, - SearchXIcon, - ActivityIcon, -} from "lucide-react" +import { ClockIcon, GitPullRequestArrowIcon, SearchXIcon, ActivityIcon } from "lucide-react"; + +import { timeAgo } from "../lib/format"; +import { Badge } from "../primitives/badge"; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "../primitives/card"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "../primitives/tabs"; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "../primitives/tooltip"; +import type { EvolutionEntry, PendingProposal, UnmatchedQuery } from "../types"; const ACTION_VARIANT: Record = { created: "outline", @@ -24,7 +14,7 @@ const ACTION_VARIANT: Record void + evolution: EvolutionEntry[]; + pendingProposals: PendingProposal[]; + unmatchedQueries: UnmatchedQuery[]; + onSelectProposal?: (skillName: string, proposalId: string) => void; }) { - const hasActivity = evolution.length > 0 || pendingProposals.length > 0 || unmatchedQueries.length > 0 + const hasActivity = + evolution.length > 0 || pendingProposals.length > 0 || unmatchedQueries.length > 0; if (!hasActivity) { return ( @@ -49,12 +40,10 @@ export function ActivityPanel({ -

- No recent activity -

+

No recent activity

- ) + ); } return ( @@ -80,7 +69,9 @@ export function ActivityPanel({ {pendingProposals.length > 0 && ( - }> + } + > {pendingProposals.length} @@ -97,7 +88,9 @@ export function ActivityPanel({ {unmatchedQueries.length > 0 && ( - }> + } + > {unmatchedQueries.length} @@ -116,7 +109,8 @@ export function ActivityPanel({ key={p.proposal_id} type="button" onClick={() => { - if (p.skill_name && onSelectProposal) onSelectProposal(p.skill_name, p.proposal_id) + if (p.skill_name && onSelectProposal) + onSelectProposal(p.skill_name, p.proposal_id); }} disabled={!p.skill_name || !onSelectProposal} className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default" @@ -124,7 +118,10 @@ export function ActivityPanel({
- + {p.action} @@ -149,20 +146,29 @@ export function ActivityPanel({ key={`${entry.proposal_id}-${i}`} type="button" onClick={() => { - if (entry.skill_name && onSelectProposal) onSelectProposal(entry.skill_name, entry.proposal_id) + if (entry.skill_name && onSelectProposal) + onSelectProposal(entry.skill_name, entry.proposal_id); }} disabled={!entry.skill_name || !onSelectProposal} className="flex w-full gap-3 rounded-md p-1.5 text-left transition-colors enabled:hover:bg-accent/40 disabled:cursor-default" > -
+
- + {entry.action} @@ -171,7 +177,8 @@ export function ActivityPanel({

{entry.details}

- {entry.skill_name ? `${entry.skill_name} · ` : ""}#{entry.proposal_id.slice(0, 8)} + {entry.skill_name ? `${entry.skill_name} · ` : ""}# + {entry.proposal_id.slice(0, 8)}
@@ -199,5 +206,5 @@ export function ActivityPanel({ - ) + ); } diff --git a/packages/ui/src/components/EvidenceViewer.tsx b/packages/ui/src/components/EvidenceViewer.tsx index 573a32f6..be9180b3 100644 --- a/packages/ui/src/components/EvidenceViewer.tsx +++ b/packages/ui/src/components/EvidenceViewer.tsx @@ -1,8 +1,3 @@ -import { useMemo, useState } from "react" -import { Badge } from "../primitives/badge" -import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card" -import type { EvidenceEntry, EvolutionEntry } from "../types" -import { formatRate, timeAgo } from "../lib/format" import { CheckCircleIcon, ChevronDownIcon, @@ -19,8 +14,14 @@ import { TrendingUpIcon, TrendingDownIcon, ListChecksIcon, -} from "lucide-react" -import Markdown from "react-markdown" +} from "lucide-react"; +import { useMemo, useState } from "react"; +import Markdown from "react-markdown"; + +import { formatRate, timeAgo } from "../lib/format"; +import { Badge } from "../primitives/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "../primitives/card"; +import type { EvidenceEntry, EvolutionEntry } from "../types"; const ACTION_ICON: Record = { created: , @@ -28,7 +29,7 @@ const ACTION_ICON: Record = { deployed: , rejected: , rolled_back: , -} +}; const ACTION_VARIANT: Record = { created: "outline", @@ -36,34 +37,34 @@ const ACTION_VARIANT: Record; body: string } { - const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/) - if (!match) return { meta: {}, body: text } + const match = text.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/); + if (!match) return { meta: {}, body: text }; - const meta: Record = {} + const meta: Record = {}; for (const line of match[1].split("\n")) { - const idx = line.indexOf(":") + const idx = line.indexOf(":"); if (idx > 0) { - const key = line.slice(0, idx).trim() - const val = line.slice(idx + 1).trim() - if (key && val) meta[key] = val + const key = line.slice(0, idx).trim(); + const val = line.slice(idx + 1).trim(); + if (key && val) meta[key] = val; } } - return { meta, body: match[2] } + return { meta, body: match[2] }; } function FrontmatterTable({ meta }: { meta: Record }) { - const entries = Object.entries(meta) - if (entries.length === 0) return null + const entries = Object.entries(meta); + if (entries.length === 0) return null; return (
@@ -74,20 +75,32 @@ function FrontmatterTable({ meta }: { meta: Record }) {
))}
- ) + ); } -function SkillContentBlock({ label, text, variant }: { label: string; text: string; variant: "original" | "proposed" }) { - const { meta, body } = parseFrontmatter(text) - const hasMeta = Object.keys(meta).length > 0 +function SkillContentBlock({ + label, + text, + variant, +}: { + label: string; + text: string; + variant: "original" | "proposed"; +}) { + const { meta, body } = parseFrontmatter(text); + const hasMeta = Object.keys(meta).length > 0; return (
- {label} + + {label} + {variant === "proposed" && ( - New + + New + )}
@@ -103,61 +116,73 @@ function SkillContentBlock({ label, text, variant }: { label: string; text: stri {body}
- ) + ); } /** Smart formatting for a single validation value */ function formatValidationValue(key: string, val: unknown): React.ReactNode { // Booleans if (typeof val === "boolean") { - return val - ? - : + return val ? ( + + ) : ( + + ); } // Numbers that look like rates (0-1 range, or key contains "rate"/"change") if (typeof val === "number") { - const isRate = key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count") + const isRate = + key.includes("rate") || key.includes("change") || (val >= -1 && val <= 1 && key !== "count"); if (isRate) { - const pct = (val * 100).toFixed(1) - const prefix = val > 0 && key.includes("change") ? "+" : "" - return {prefix}{pct}% + const pct = (val * 100).toFixed(1); + const prefix = val > 0 && key.includes("change") ? "+" : ""; + return ( + + {prefix} + {pct}% + + ); } - return {val} + return {val}; } // null/undefined - if (val === null || val === undefined) return -- + if (val === null || val === undefined) return --; // Strings - if (typeof val === "string") return {val} + if (typeof val === "string") return {val}; // Arrays — render as list of items if (Array.isArray(val)) { - if (val.length === 0) return none - return {val.length} entries + if (val.length === 0) return none; + return {val.length} entries; } // Objects - if (typeof val === "object") return 1 entry - return {String(val)} + if (typeof val === "object") return 1 entry; + return {String(val)}; } /** Render a per_entry_result row — handles both flat EvalEntry and nested { entry, before_pass, after_pass } */ function PerEntryResult({ entry }: { entry: Record }) { // Handle nested shape: { entry: { query, should_trigger }, before_pass, after_pass } - const nested = entry.entry as Record | undefined - const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text - const shouldTrigger = nested?.should_trigger ?? entry.should_trigger - const invocationType = nested?.invocation_type ?? entry.invocation_type - const beforePass = entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline - const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result - const passed = entry.passed ?? entry.matched + const nested = entry.entry as Record | undefined; + const query = nested?.query ?? entry.query ?? entry.prompt ?? entry.input ?? entry.text; + const shouldTrigger = nested?.should_trigger ?? entry.should_trigger; + const invocationType = nested?.invocation_type ?? entry.invocation_type; + const beforePass = + entry.before_pass ?? entry.before ?? entry.original_triggered ?? entry.baseline; + const afterPass = entry.after_pass ?? entry.after ?? entry.triggered ?? entry.result; + const passed = entry.passed ?? entry.matched; // Determine icon: use after_pass for per_entry_results, passed for others - const isPass = typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null + const isPass = + typeof afterPass === "boolean" ? afterPass : typeof passed === "boolean" ? passed : null; return (
{isPass !== null ? ( - isPass - ? - : + isPass ? ( + + ) : ( + + ) ) : ( )} @@ -182,21 +207,32 @@ function PerEntryResult({ entry }: { entry: Record }) { )}
- ) + ); } function ValidationResults({ validation }: { validation: Record }) { - const { improved, before_pass_rate, after_pass_rate, net_change, regressions, new_passes, per_entry_results, ...rest } = validation - - const regressionsArr = Array.isArray(regressions) ? regressions : [] - const newPassesArr = Array.isArray(new_passes) ? new_passes : [] - const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : [] + const { + improved, + before_pass_rate, + after_pass_rate, + net_change, + regressions, + new_passes, + per_entry_results, + ...rest + } = validation; + + const regressionsArr = Array.isArray(regressions) ? regressions : []; + const newPassesArr = Array.isArray(new_passes) ? new_passes : []; + const perEntryArr = Array.isArray(per_entry_results) ? per_entry_results : []; return (

Validation Results - — Before/after comparison from eval tests + + — Before/after comparison from eval tests +

{/* Summary bar */} @@ -212,8 +248,11 @@ function ValidationResults({ validation }: { validation: Record )} {typeof net_change === "number" && ( - 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}> - {net_change > 0 ? "+" : ""}{(net_change * 100).toFixed(1)}% + 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`} + > + {net_change > 0 ? "+" : ""} + {(net_change * 100).toFixed(1)}% )}
@@ -226,7 +265,14 @@ function ValidationResults({ validation }: { validation: Record

{newPassesArr.map((entry, j) => ( - : { value: entry }} /> + ) + : { value: entry } + } + /> ))}
@@ -240,16 +286,21 @@ function ValidationResults({ validation }: { validation: Record

{regressionsArr.map((entry, j) => ( - : { value: entry }} /> + ) + : { value: entry } + } + /> ))}
)} {/* Per-entry results (collapsible if many) */} - {perEntryArr.length > 0 && ( - - )} + {perEntryArr.length > 0 && } {/* Any remaining keys */} {Object.keys(rest).length > 0 && ( @@ -263,18 +314,24 @@ function ValidationResults({ validation }: { validation: Record
)} - ) + ); } function PerEntryResultsSection({ entries }: { entries: unknown[] }) { - const [expanded, setExpanded] = useState(false) + const [expanded, setExpanded] = useState(false); const passCount = entries.filter((e) => { - if (typeof e !== "object" || e === null) return false - const obj = e as Record - return obj.passed === true || obj.matched === true || obj.triggered === true || obj.after === true || obj.result === true - }).length - - const display = expanded ? entries : entries.slice(0, 5) + if (typeof e !== "object" || e === null) return false; + const obj = e as Record; + return ( + obj.passed === true || + obj.matched === true || + obj.triggered === true || + obj.after === true || + obj.result === true + ); + }).length; + + const display = expanded ? entries : entries.slice(0, 5); return (
@@ -303,41 +360,48 @@ function PerEntryResultsSection({ entries }: { entries: unknown[] }) { {display.map((entry, j) => ( : { value: entry }} + entry={ + typeof entry === "object" && entry !== null + ? (entry as Record) + : { value: entry } + } /> ))}
- ) + ); } /** Extract after_pass_rate from an evidence entry's validation data */ function getAfterPassRate(entry: EvidenceEntry): number | null { - if (!entry.validation) return null - const rate = entry.validation.after_pass_rate - return typeof rate === "number" ? rate : null + if (!entry.validation) return null; + const rate = entry.validation.after_pass_rate; + return typeof rate === "number" ? rate : null; } /** Render a delta badge between two pass rates, returns null if not computable */ function DeltaBadge({ prev, curr }: { prev: number | null; curr: number | null }) { - if (prev === null || curr === null) return null - const delta = curr - prev - if (delta === 0) return null - const pct = (delta * 100).toFixed(1) - const positive = delta > 0 + if (prev === null || curr === null) return null; + const delta = curr - prev; + if (delta === 0) return null; + const pct = (delta * 100).toFixed(1); + const positive = delta > 0; return ( - - {positive ? "+" : ""}{pct}% vs previous + + {positive ? "+" : ""} + {pct}% vs previous - ) + ); } function EvalSetSection({ evalSet }: { evalSet: Array> }) { - const [expanded, setExpanded] = useState(false) + const [expanded, setExpanded] = useState(false); const passCount = evalSet.filter((e) => { - const passed = e.passed ?? e.result - return passed === true - }).length + const passed = e.passed ?? e.result; + return passed === true; + }).length; return (
@@ -346,9 +410,11 @@ function EvalSetSection({ evalSet }: { evalSet: Array> } onClick={() => setExpanded(!expanded)} className="flex items-center gap-1.5 w-full text-left" > - {expanded - ? - : } + {expanded ? ( + + ) : ( + + )} Eval Set ({passCount}/{evalSet.length} passed) @@ -357,34 +423,41 @@ function EvalSetSection({ evalSet }: { evalSet: Array> } {expanded && (
{evalSet.map((evalEntry, j) => { - const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input - const expected = evalEntry.expected ?? evalEntry.should_trigger - const passed = evalEntry.passed ?? evalEntry.result + const query = evalEntry.query ?? evalEntry.prompt ?? evalEntry.input; + const expected = evalEntry.expected ?? evalEntry.should_trigger; + const passed = evalEntry.passed ?? evalEntry.result; return ( -
+
{typeof passed === "boolean" ? ( - passed - ? - : + passed ? ( + + ) : ( + + ) ) : ( )} - {String(query ?? JSON.stringify(evalEntry))} + + {String(query ?? JSON.stringify(evalEntry))} + {expected !== undefined && ( expect: {String(expected)} )}
- ) + ); })}
)}
- ) + ); } -type RoundStatus = "single" | "intermediate" | "final" +type RoundStatus = "single" | "intermediate" | "final"; /** Render a single evidence card — used for both expanded and collapsed states */ function EvidenceCard({ @@ -394,13 +467,13 @@ function EvidenceCard({ prevPassRate, currPassRate, }: { - entry: EvidenceEntry - roundLabel: string | null - roundStatus: RoundStatus - prevPassRate: number | null - currPassRate: number | null + entry: EvidenceEntry; + roundLabel: string | null; + roundStatus: RoundStatus; + prevPassRate: number | null; + currPassRate: number | null; }) { - const showRound = roundStatus !== "single" + const showRound = roundStatus !== "single"; return ( @@ -412,15 +485,25 @@ function EvidenceCard({ {roundLabel} )} {roundStatus === "final" && ( - Final + + Final + )}
{showRound && } - {entry.stage} + + {entry.stage} + {entry.confidence !== null && ( = 0.8 ? "default" : entry.confidence >= 0.5 ? "secondary" : "destructive"} + variant={ + entry.confidence >= 0.8 + ? "default" + : entry.confidence >= 0.5 + ? "secondary" + : "destructive" + } className="text-[10px] font-mono" > {formatRate(entry.confidence)} confidence @@ -457,9 +540,7 @@ function EvidenceCard({ )} {/* Eval set — test cases used for validation (collapsible) */} - {entry.eval_set && entry.eval_set.length > 0 && ( - - )} + {entry.eval_set && entry.eval_set.length > 0 && } {/* Validation details */} {entry.validation && Object.keys(entry.validation).length > 0 && ( @@ -467,7 +548,7 @@ function EvidenceCard({ )} - ) + ); } /** Collapsed summary for earlier iteration rounds */ @@ -476,12 +557,12 @@ function CollapsedEvidenceCard({ roundLabel, onExpand, }: { - entry: EvidenceEntry - roundLabel: string - onExpand: () => void + entry: EvidenceEntry; + roundLabel: string; + onExpand: () => void; }) { - const passRate = getAfterPassRate(entry) - const improved = entry.validation?.improved + const passRate = getAfterPassRate(entry); + const improved = entry.validation?.improved; return (
- ) + ); } export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { const steps = useMemo( - () => evolution - .filter((e) => e.proposal_id === proposalId) - .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()), + () => + evolution + .filter((e) => e.proposal_id === proposalId) + .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()), [evolution, proposalId], - ) + ); const entries = useMemo( - () => evidence - .filter((e) => e.proposal_id === proposalId) - .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()), + () => + evidence + .filter((e) => e.proposal_id === proposalId) + .sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()), [evidence, proposalId], - ) + ); // Track which earlier rounds are manually expanded - const [expandedRounds, setExpandedRounds] = useState>(new Set()) + const [expandedRounds, setExpandedRounds] = useState>(new Set()); const toggleRound = (key: string) => { setExpandedRounds((prev) => { - const next = new Set(prev) - if (next.has(key)) next.delete(key) - else next.add(key) - return next - }) - } + const next = new Set(prev); + if (next.has(key)) next.delete(key); + else next.add(key); + return next; + }); + }; const snapshot = useMemo(() => { for (let i = steps.length - 1; i >= 0; i--) { - if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record + if (steps[i].eval_snapshot) return steps[i].eval_snapshot as Record; } - return null - }, [steps]) + return null; + }, [steps]); // Separate proposal-stage entries from validation-stage entries, then group validations by target const { proposalEntries, validationsByTarget } = useMemo(() => { - const proposals: EvidenceEntry[] = [] - const validationMap = new Map() + const proposals: EvidenceEntry[] = []; + const validationMap = new Map(); for (const entry of entries) { if (entry.stage !== "validated") { - proposals.push(entry) + proposals.push(entry); } else { - const key = entry.target - if (!validationMap.has(key)) validationMap.set(key, []) - validationMap.get(key)!.push(entry) + const key = entry.target; + if (!validationMap.has(key)) validationMap.set(key, []); + validationMap.get(key)!.push(entry); } } - return { proposalEntries: proposals, validationsByTarget: validationMap } - }, [entries]) + return { proposalEntries: proposals, validationsByTarget: validationMap }; + }, [entries]); return (
@@ -567,8 +652,9 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {

- This view shows the complete evidence trail for a skill evolution proposal — how the skill was changed, - the eval test results before and after, and whether the change improved performance. + This view shows the complete evidence trail for a skill evolution proposal — how the + skill was changed, the eval test results before and after, and whether the change improved + performance.

@@ -577,7 +663,9 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { Proposal Journey - #{proposalId.slice(0, 12)} + + #{proposalId.slice(0, 12)} + @@ -587,10 +675,15 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { {i > 0 && }
{ACTION_ICON[step.action]} - + {step.action.replace("_", " ")} - {timeAgo(step.timestamp)} + + {timeAgo(step.timestamp)} +
))} @@ -601,21 +694,31 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
{typeof snapshot.net_change === "number" && (
- {(snapshot.net_change as number) > 0 - ? - : } - 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`}> - {(snapshot.net_change as number) > 0 ? "+" : ""}{Math.round((snapshot.net_change as number) * 100)}% + {(snapshot.net_change as number) > 0 ? ( + + ) : ( + + )} + 0 ? "text-emerald-600 dark:text-emerald-400" : "text-red-500"}`} + > + {(snapshot.net_change as number) > 0 ? "+" : ""} + {Math.round((snapshot.net_change as number) * 100)}%
)} - {typeof snapshot.before_pass_rate === "number" && typeof snapshot.after_pass_rate === "number" && ( - - {Math.round((snapshot.before_pass_rate as number) * 100)}% → {Math.round((snapshot.after_pass_rate as number) * 100)}% - - )} + {typeof snapshot.before_pass_rate === "number" && + typeof snapshot.after_pass_rate === "number" && ( + + {Math.round((snapshot.before_pass_rate as number) * 100)}% →{" "} + {Math.round((snapshot.after_pass_rate as number) * 100)}% + + )} {snapshot.improved !== undefined && ( - + {snapshot.improved ? "Improved" : "Regressed"} )} @@ -645,17 +748,23 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { {/* Validation-stage evidence — grouped by target with iteration rounds */} {Array.from(validationsByTarget.entries()).map(([target, targetEntries]) => { - const hasMultipleRounds = targetEntries.length > 1 + const hasMultipleRounds = targetEntries.length > 1; return (
{targetEntries.map((entry, i) => { - const isLast = i === targetEntries.length - 1 - const roundLabel = hasMultipleRounds ? `Round ${i + 1} of ${targetEntries.length}` : null - const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null - const currPassRate = getAfterPassRate(entry) - const roundKey = `${target}-${entry.timestamp}` - const roundStatus: RoundStatus = !hasMultipleRounds ? "single" : isLast ? "final" : "intermediate" + const isLast = i === targetEntries.length - 1; + const roundLabel = hasMultipleRounds + ? `Round ${i + 1} of ${targetEntries.length}` + : null; + const prevPassRate = i > 0 ? getAfterPassRate(targetEntries[i - 1]) : null; + const currPassRate = getAfterPassRate(entry); + const roundKey = `${target}-${entry.timestamp}`; + const roundStatus: RoundStatus = !hasMultipleRounds + ? "single" + : isLast + ? "final" + : "intermediate"; // Earlier rounds: collapsed by default if (roundStatus === "intermediate" && !expandedRounds.has(roundKey)) { @@ -666,7 +775,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { roundLabel={roundLabel!} onExpand={() => toggleRound(roundKey)} /> - ) + ); } // Expanded earlier round — show with collapse toggle @@ -689,7 +798,7 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { currPassRate={currPassRate} />
- ) + ); } // Final round (or single entry) — always expanded @@ -702,10 +811,10 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) { prevPassRate={prevPassRate} currPassRate={currPassRate} /> - ) + ); })}
- ) + ); })} {entries.length === 0 && ( @@ -714,5 +823,5 @@ export function EvidenceViewer({ proposalId, evolution, evidence }: Props) {
)} - ) + ); } diff --git a/packages/ui/src/components/EvolutionTimeline.tsx b/packages/ui/src/components/EvolutionTimeline.tsx index e7c84de0..d7f883a3 100644 --- a/packages/ui/src/components/EvolutionTimeline.tsx +++ b/packages/ui/src/components/EvolutionTimeline.tsx @@ -1,8 +1,3 @@ -import { useState } from "react" -import { Badge } from "../primitives/badge" -import { cn } from "../lib/utils" -import type { EvalSnapshot, EvolutionEntry } from "../types" -import { timeAgo } from "../lib/format" import { CircleDotIcon, RocketIcon, @@ -13,7 +8,13 @@ import { TrendingDownIcon, ChevronDownIcon, ChevronRightIcon, -} from "lucide-react" +} from "lucide-react"; +import { useState } from "react"; + +import { timeAgo } from "../lib/format"; +import { cn } from "../lib/utils"; +import { Badge } from "../primitives/badge"; +import type { EvalSnapshot, EvolutionEntry } from "../types"; const ACTION_ICON: Record = { created: , @@ -21,7 +22,7 @@ const ACTION_ICON: Record = { deployed: , rejected: , rolled_back: , -} +}; const ACTION_COLOR: Record = { created: "bg-blue-500", @@ -29,7 +30,7 @@ const ACTION_COLOR: Record = { deployed: "bg-emerald-500", rejected: "bg-red-500", rolled_back: "bg-red-400", -} +}; const ACTION_RING: Record = { created: "ring-blue-500/30", @@ -37,7 +38,7 @@ const ACTION_RING: Record = { deployed: "ring-emerald-500/30", rejected: "ring-red-500/30", rolled_back: "ring-red-400/30", -} +}; const ACTION_LINE: Record = { created: "bg-blue-500/30", @@ -45,58 +46,65 @@ const ACTION_LINE: Record = { deployed: "bg-emerald-500/30", rejected: "bg-red-500/30", rolled_back: "bg-red-400/30", -} +}; interface Props { - entries: EvolutionEntry[] - selectedProposalId: string | null - onSelect: (proposalId: string) => void + entries: EvolutionEntry[]; + selectedProposalId: string | null; + onSelect: (proposalId: string) => void; } /** Group evolution entries by proposal_id, ordered newest-first. */ function groupByProposal(entries: EvolutionEntry[]) { - const map = new Map() + const map = new Map(); for (const e of entries) { - const group = map.get(e.proposal_id) ?? [] - group.push(e) - map.set(e.proposal_id, group) + const group = map.get(e.proposal_id) ?? []; + group.push(e); + map.set(e.proposal_id, group); } for (const group of map.values()) { - group.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()) + group.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()); } return Array.from(map.entries()).sort((a, b) => { - const aLast = a[1][a[1].length - 1] - const bLast = b[1][b[1].length - 1] - return new Date(bLast.timestamp).getTime() - new Date(aLast.timestamp).getTime() - }) + const aLast = a[1][a[1].length - 1]; + const bLast = b[1][b[1].length - 1]; + return new Date(bLast.timestamp).getTime() - new Date(aLast.timestamp).getTime(); + }); } function terminalAction(entries: EvolutionEntry[]): string { - return entries[entries.length - 1].action + return entries[entries.length - 1].action; } /** Find the best eval_snapshot across all steps in a proposal group */ function findEvalSnapshot(steps: EvolutionEntry[]): EvalSnapshot | null { for (let i = steps.length - 1; i >= 0; i--) { - if (steps[i].eval_snapshot) return steps[i].eval_snapshot! + if (steps[i].eval_snapshot) return steps[i].eval_snapshot!; } - return null + return null; } function PassRateDelta({ snapshot }: { snapshot: EvalSnapshot }) { - const net = snapshot.net_change - if (net === undefined || net === null) return null - const pct = Math.round(net * 100) - const isPositive = pct > 0 + const net = snapshot.net_change; + if (net === undefined || net === null) return null; + const pct = Math.round(net * 100); + const isPositive = pct > 0; return ( - - {isPositive ? : } - {isPositive ? "+" : ""}{pct}% + + {isPositive ? ( + + ) : ( + + )} + {isPositive ? "+" : ""} + {pct}% - ) + ); } const LIFECYCLE_STEPS = [ @@ -105,10 +113,10 @@ const LIFECYCLE_STEPS = [ { action: "deployed", label: "Deployed", desc: "Accepted and applied to skill file" }, { action: "rejected", label: "Rejected", desc: "Failed validation criteria" }, { action: "rolled_back", label: "Rolled Back", desc: "Reverted after deployment" }, -] +]; function LifecycleLegend() { - const [open, setOpen] = useState(false) + const [open, setOpen] = useState(false); return (
@@ -124,10 +132,7 @@ function LifecycleLegend() {
{LIFECYCLE_STEPS.map((step) => (
-
+
{step.label}

{step.desc}

@@ -137,18 +142,18 @@ function LifecycleLegend() {
)}
- ) + ); } export function EvolutionTimeline({ entries, selectedProposalId, onSelect }: Props) { - const groups = groupByProposal(entries) + const groups = groupByProposal(entries); if (groups.length === 0) { return (

No evolution history yet

- ) + ); } return ( @@ -159,29 +164,29 @@ export function EvolutionTimeline({ entries, selectedProposalId, onSelect }: Pro
- ) + ); } diff --git a/packages/ui/src/components/InfoTip.tsx b/packages/ui/src/components/InfoTip.tsx index 15667795..0697da62 100644 --- a/packages/ui/src/components/InfoTip.tsx +++ b/packages/ui/src/components/InfoTip.tsx @@ -1,5 +1,6 @@ -import { Tooltip, TooltipContent, TooltipTrigger } from "../primitives/tooltip" -import { InfoIcon } from "lucide-react" +import { InfoIcon } from "lucide-react"; + +import { Tooltip, TooltipContent, TooltipTrigger } from "../primitives/tooltip"; /** Small info icon that shows a tooltip on hover. Used to explain metrics and concepts. */ export function InfoTip({ text }: { text: string }) { @@ -15,5 +16,5 @@ export function InfoTip({ text }: { text: string }) { {text} - ) + ); } diff --git a/packages/ui/src/components/OrchestrateRunsPanel.tsx b/packages/ui/src/components/OrchestrateRunsPanel.tsx index 98b817ab..1db7b227 100644 --- a/packages/ui/src/components/OrchestrateRunsPanel.tsx +++ b/packages/ui/src/components/OrchestrateRunsPanel.tsx @@ -1,33 +1,17 @@ -import { useState } from "react" -import { Badge } from "../primitives/badge" -import { - Card, - CardContent, - CardDescription, - CardHeader, - CardTitle, -} from "../primitives/card" -import { - Collapsible, - CollapsibleContent, - CollapsibleTrigger, -} from "../primitives/collapsible" -import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "../types" -import { timeAgo } from "../lib/format" -import { - BotIcon, - CheckCircleIcon, - ChevronRightIcon, - EyeIcon, - SkipForwardIcon, - ZapIcon, -} from "lucide-react" +import { BotIcon, ChevronRightIcon, EyeIcon, SkipForwardIcon, ZapIcon } from "lucide-react"; +import { useState } from "react"; + +import { timeAgo } from "../lib/format"; +import { Badge } from "../primitives/badge"; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "../primitives/card"; +import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "../primitives/collapsible"; +import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "../types"; const ACTION_ICON: Record = { evolve: , watch: , skip: , -} +}; function SkillActionRow({ action }: { action: OrchestrateRunSkillAction }) { return ( @@ -38,18 +22,25 @@ function SkillActionRow({ action }: { action: OrchestrateRunSkillAction }) { {action.skill} - {action.rolledBack ? "rolled back" - : action.action === "evolve" && action.deployed ? "deployed" - : action.action === "evolve" ? "evolved" - : action.action} + {action.rolledBack + ? "rolled back" + : action.action === "evolve" && action.deployed + ? "deployed" + : action.action === "evolve" + ? "evolved" + : action.action} {action.alert && ( @@ -60,42 +51,56 @@ function SkillActionRow({ action }: { action: OrchestrateRunSkillAction }) {

{action.reason}

- ) + ); } function RunCard({ run }: { run: OrchestrateRunReport }) { - const [open, setOpen] = useState(false) - const nonSkipActions = run.skill_actions.filter((a) => a.action !== "skip") - const skipActions = run.skill_actions.filter((a) => a.action === "skip") + const [open, setOpen] = useState(false); + const nonSkipActions = run.skill_actions.filter((a) => a.action !== "skip"); + const skipActions = run.skill_actions.filter((a) => a.action === "skip"); return (
-
0 ? "bg-emerald-500" - : run.evolved > 0 ? "bg-amber-400" - : "bg-muted-foreground/40" - }`} /> +
0 + ? "bg-emerald-500" + : run.evolved > 0 + ? "bg-amber-400" + : "bg-muted-foreground/40" + }`} + />
- {timeAgo(run.timestamp)} + + {timeAgo(run.timestamp)} + {run.dry_run && ( - dry-run + + dry-run + )} {run.approval_mode === "review" && ( - review + + review + )}
- {run.deployed > 0 && {run.deployed} deployed} + {run.deployed > 0 && ( + {run.deployed} deployed + )} {run.evolved > 0 && {run.evolved} evolved} {run.watched > 0 && {run.watched} watched} {run.skipped > 0 && {run.skipped} skipped} {(run.elapsed_ms / 1000).toFixed(1)}s
- +
@@ -118,7 +123,7 @@ function RunCard({ run }: { run: OrchestrateRunReport }) {
- ) + ); } export function OrchestrateRunsPanel({ runs }: { runs: OrchestrateRunReport[] }) { @@ -133,14 +138,16 @@ export function OrchestrateRunsPanel({ runs }: { runs: OrchestrateRunReport[] })

- No orchestrate runs yet. Run selftune orchestrate to start. + No orchestrate runs yet. Run{" "} + selftune orchestrate to + start.

- ) + ); } - const totalDeployed = runs.reduce((sum, r) => sum + r.deployed, 0) + const totalDeployed = runs.reduce((sum, r) => sum + r.deployed, 0); return ( @@ -160,5 +167,5 @@ export function OrchestrateRunsPanel({ runs }: { runs: OrchestrateRunReport[] }) ))} - ) + ); } diff --git a/packages/ui/src/components/section-cards.tsx b/packages/ui/src/components/section-cards.tsx index a34e60be..1746495d 100644 --- a/packages/ui/src/components/section-cards.tsx +++ b/packages/ui/src/components/section-cards.tsx @@ -1,12 +1,3 @@ -import { Badge } from "../primitives/badge" -import { - Card, - CardAction, - CardDescription, - CardHeader, - CardTitle, -} from "../primitives/card" -import { InfoTip } from "./InfoTip" import { TrendingUpIcon, TrendingDownIcon, @@ -16,16 +7,20 @@ import { FlaskConicalIcon, LayersIcon, SearchXIcon, -} from "lucide-react" +} from "lucide-react"; + +import { Badge } from "../primitives/badge"; +import { Card, CardAction, CardDescription, CardHeader, CardTitle } from "../primitives/card"; +import { InfoTip } from "./InfoTip"; interface SectionCardsProps { - skillsCount: number - avgPassRate: number | null - unmatchedCount: number - sessionsCount: number - pendingCount: number - evidenceCount: number - hasEvolution?: boolean + skillsCount: number; + avgPassRate: number | null; + unmatchedCount: number; + sessionsCount: number; + pendingCount: number; + evidenceCount: number; + hasEvolution?: boolean; } export function SectionCards({ @@ -37,8 +32,8 @@ export function SectionCards({ evidenceCount, hasEvolution = true, }: SectionCardsProps) { - const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--" - const passRateGood = avgPassRate !== null && avgPassRate >= 0.7 + const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--"; + const passRateGood = avgPassRate !== null && avgPassRate >= 0.7; return (
@@ -68,7 +63,9 @@ export function SectionCards({ Avg Trigger Rate - + {passRateStr} @@ -140,9 +137,7 @@ export function SectionCards({ no evolution runs yet ) : pendingCount > 0 ? ( - - awaiting review - + awaiting review ) : null} @@ -168,5 +163,5 @@ export function SectionCards({
- ) + ); } diff --git a/packages/ui/src/components/skill-health-grid.tsx b/packages/ui/src/components/skill-health-grid.tsx index 213bb4c2..ce94b720 100644 --- a/packages/ui/src/components/skill-health-grid.tsx +++ b/packages/ui/src/components/skill-health-grid.tsx @@ -1,4 +1,3 @@ -import * as React from "react" import { closestCenter, DndContext, @@ -9,16 +8,16 @@ import { useSensors, type DragEndEvent, type UniqueIdentifier, -} from "@dnd-kit/core" -import { restrictToVerticalAxis } from "@dnd-kit/modifiers" +} from "@dnd-kit/core"; +import { restrictToVerticalAxis } from "@dnd-kit/modifiers"; import { arrayMove, SortableContext, sortableKeyboardCoordinates, useSortable, verticalListSortingStrategy, -} from "@dnd-kit/sortable" -import { CSS } from "@dnd-kit/utilities" +} from "@dnd-kit/sortable"; +import { CSS } from "@dnd-kit/utilities"; import { flexRender, getCoreRowModel, @@ -33,45 +32,7 @@ import { type Row, type SortingState, type VisibilityState, -} from "@tanstack/react-table" - -import { Badge } from "../primitives/badge" -import { Button } from "../primitives/button" -import { Checkbox } from "../primitives/checkbox" -import { - DropdownMenu, - DropdownMenuCheckboxItem, - DropdownMenuContent, - DropdownMenuRadioGroup, - DropdownMenuRadioItem, - DropdownMenuTrigger, -} from "../primitives/dropdown-menu" -import { Label } from "../primitives/label" -import { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectTrigger, - SelectValue, -} from "../primitives/select" -import { - Table, - TableBody, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "../primitives/table" -import { - Tabs, - TabsContent, - TabsList, - TabsTrigger, -} from "../primitives/tabs" -import { STATUS_CONFIG } from "../lib/constants" -import type { SkillCard, SkillHealthStatus } from "../types" -import { formatRate, timeAgo } from "../lib/format" +} from "@tanstack/react-table"; import { GripVerticalIcon, Columns3Icon, @@ -88,17 +49,47 @@ import { XCircleIcon, CircleDotIcon, HelpCircleIcon, -} from "lucide-react" +} from "lucide-react"; +import * as React from "react"; + +import { STATUS_CONFIG } from "../lib/constants"; +import { formatRate, timeAgo } from "../lib/format"; +import { Badge } from "../primitives/badge"; +import { Button } from "../primitives/button"; +import { Checkbox } from "../primitives/checkbox"; +import { + DropdownMenu, + DropdownMenuCheckboxItem, + DropdownMenuContent, + DropdownMenuRadioGroup, + DropdownMenuRadioItem, + DropdownMenuTrigger, +} from "../primitives/dropdown-menu"; +import { Label } from "../primitives/label"; +import { + Select, + SelectContent, + SelectGroup, + SelectItem, + SelectTrigger, + SelectValue, +} from "../primitives/select"; +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "../primitives/table"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "../primitives/tabs"; +import type { SkillCard, SkillHealthStatus } from "../types"; // ---------- Drag handle ---------- -type SortableContextValue = Pick, "attributes" | "listeners" | "setActivatorNodeRef"> +type SortableContextValue = Pick< + ReturnType, + "attributes" | "listeners" | "setActivatorNodeRef" +>; -const SortableRowContext = React.createContext(null) +const SortableRowContext = React.createContext(null); function DragHandle() { - const ctx = React.useContext(SortableRowContext) - if (!ctx) return null + const ctx = React.useContext(SortableRowContext); + if (!ctx) return null; return ( - ) + ); } // ---------- Column definitions ---------- -function createColumns(renderSkillName?: (skill: SkillCard) => React.ReactNode): ColumnDef[] { +function createColumns( + renderSkillName?: (skill: SkillCard) => React.ReactNode, +): ColumnDef[] { return [ { id: "drag", @@ -129,10 +122,7 @@ function createColumns(renderSkillName?: (skill: SkillCard) => React.ReactNode):
table.toggleAllPageRowsSelected(!!value)} aria-label="Select all" /> @@ -153,77 +143,82 @@ function createColumns(renderSkillName?: (skill: SkillCard) => React.ReactNode): { accessorKey: "name", header: "Skill", - cell: ({ row }) => renderSkillName - ? renderSkillName(row.original) - : {row.original.name}, + cell: ({ row }) => + renderSkillName ? ( + renderSkillName(row.original) + ) : ( + {row.original.name} + ), enableHiding: false, }, { accessorKey: "scope", header: "Scope", cell: ({ row }) => { - const scope = row.original.scope - if (!scope) return -- + const scope = row.original.scope; + if (!scope) return --; return ( {scope} - ) + ); }, }, { accessorKey: "status", header: "Status", cell: ({ row }) => { - const config = STATUS_CONFIG[row.original.status] + const config = STATUS_CONFIG[row.original.status]; return ( {config.icon} {config.label} - ) + ); }, sortingFn: (rowA, rowB) => { const order: Record = { - CRITICAL: 0, WARNING: 1, UNGRADED: 2, UNKNOWN: 3, HEALTHY: 4, - } - return order[rowA.original.status] - order[rowB.original.status] + CRITICAL: 0, + WARNING: 1, + UNGRADED: 2, + UNKNOWN: 3, + HEALTHY: 4, + }; + return order[rowA.original.status] - order[rowB.original.status]; }, }, { accessorKey: "passRate", header: () =>
Pass Rate
, cell: ({ row }) => { - const rate = row.original.passRate - const isLow = rate !== null && rate < 0.5 + const rate = row.original.passRate; + const isLow = rate !== null && rate < 0.5; return ( -
+
{formatRate(rate)}
- ) + ); }, sortingFn: (rowA, rowB) => { - const a = rowA.original.passRate ?? -1 - const b = rowB.original.passRate ?? -1 - return a - b + const a = rowA.original.passRate ?? -1; + const b = rowB.original.passRate ?? -1; + return a - b; }, }, { accessorKey: "checks", header: () =>
Checks
, cell: ({ row }) => ( -
- {row.original.checks} -
+
{row.original.checks}
), }, { accessorKey: "uniqueSessions", header: () =>
Sessions
, cell: ({ row }) => ( -
- {row.original.uniqueSessions} -
+
{row.original.uniqueSessions}
), }, { @@ -243,13 +238,13 @@ function createColumns(renderSkillName?: (skill: SkillCard) => React.ReactNode): ), sortingFn: (rowA, rowB) => { const toEpoch = (v: string | null) => { - if (!v) return 0 - const t = new Date(v).getTime() - return Number.isNaN(t) ? 0 : t - } - const a = toEpoch(rowA.original.lastSeen) - const b = toEpoch(rowB.original.lastSeen) - return a - b + if (!v) return 0; + const t = new Date(v).getTime(); + return Number.isNaN(t) ? 0 : t; + }; + const a = toEpoch(rowA.original.lastSeen); + const b = toEpoch(rowB.original.lastSeen); + return a - b; }, }, { @@ -264,19 +259,27 @@ function createColumns(renderSkillName?: (skill: SkillCard) => React.ReactNode): ), }, - ] + ]; } // ---------- Draggable row ---------- function DraggableRow({ row }: { row: Row }) { - const { transform, transition, setNodeRef, setActivatorNodeRef, isDragging, attributes, listeners } = useSortable({ + const { + transform, + transition, + setNodeRef, + setActivatorNodeRef, + isDragging, + attributes, + listeners, + } = useSortable({ id: row.original.name, - }) + }); const sortableCtx = React.useMemo( () => ({ attributes, listeners, setActivatorNodeRef }), [attributes, listeners, setActivatorNodeRef], - ) + ); return ( }) { ))} - ) + ); } // ---------- Main component ---------- @@ -308,59 +311,64 @@ export function SkillHealthGrid({ onStatusFilterChange, renderSkillName, }: { - cards: SkillCard[] - totalCount: number - statusFilter?: SkillHealthStatus | "ALL" - onStatusFilterChange?: (v: SkillHealthStatus | "ALL") => void - renderSkillName?: (skill: SkillCard) => React.ReactNode + cards: SkillCard[]; + totalCount: number; + statusFilter?: SkillHealthStatus | "ALL"; + onStatusFilterChange?: (v: SkillHealthStatus | "ALL") => void; + renderSkillName?: (skill: SkillCard) => React.ReactNode; }) { - const [activeView, setActiveView] = React.useState("all") - const [data, setData] = React.useState([]) - const [rowSelection, setRowSelection] = React.useState({}) - const [columnVisibility, setColumnVisibility] = React.useState({}) - const [columnFilters, setColumnFilters] = React.useState([]) - const [sorting, setSorting] = React.useState([]) + const [activeView, setActiveView] = React.useState("all"); + const [data, setData] = React.useState([]); + const [rowSelection, setRowSelection] = React.useState({}); + const [columnVisibility, setColumnVisibility] = React.useState({}); + const [columnFilters, setColumnFilters] = React.useState([]); + const [sorting, setSorting] = React.useState([]); const [pagination, setPagination] = React.useState({ pageIndex: 0, pageSize: 20, - }) + }); - const columns = React.useMemo(() => createColumns(renderSkillName), [renderSkillName]) + const columns = React.useMemo(() => createColumns(renderSkillName), [renderSkillName]); // View counts for tab badges - const viewCounts = React.useMemo(() => ({ - all: cards.length, - attention: cards.filter((c) => c.status === "CRITICAL" || c.status === "WARNING").length, - recent: cards.filter((c) => c.lastSeen !== null).length, - ungraded: cards.filter((c) => c.status === "UNGRADED" || c.status === "UNKNOWN").length, - }), [cards]) + const viewCounts = React.useMemo( + () => ({ + all: cards.length, + attention: cards.filter((c) => c.status === "CRITICAL" || c.status === "WARNING").length, + recent: cards.filter((c) => c.lastSeen !== null).length, + ungraded: cards.filter((c) => c.status === "UNGRADED" || c.status === "UNKNOWN").length, + }), + [cards], + ); // Filter cards based on active view tab, then sync into local state for DnD React.useEffect(() => { - let filtered = cards + let filtered = cards; if (activeView === "attention") { - filtered = cards.filter((c) => c.status === "CRITICAL" || c.status === "WARNING") + filtered = cards.filter((c) => c.status === "CRITICAL" || c.status === "WARNING"); } else if (activeView === "recent") { - filtered = [...cards.filter((c) => c.lastSeen !== null)].sort((a, b) => { - const aTime = a.lastSeen ? new Date(a.lastSeen).getTime() : 0 - const bTime = b.lastSeen ? new Date(b.lastSeen).getTime() : 0 - return bTime - aTime - }) + filtered = cards + .filter((c) => c.lastSeen !== null) + .toSorted((a, b) => { + const aTime = a.lastSeen ? new Date(a.lastSeen).getTime() : 0; + const bTime = b.lastSeen ? new Date(b.lastSeen).getTime() : 0; + return bTime - aTime; + }); } else if (activeView === "ungraded") { - filtered = cards.filter((c) => c.status === "UNGRADED" || c.status === "UNKNOWN") + filtered = cards.filter((c) => c.status === "UNGRADED" || c.status === "UNKNOWN"); } - setData(filtered) - setPagination((prev) => ({ ...prev, pageIndex: 0 })) - }, [cards, activeView]) + setData(filtered); + setPagination((prev) => ({ ...prev, pageIndex: 0 })); + }, [cards, activeView]); - const sortableId = React.useId() + const sortableId = React.useId(); const sensors = useSensors( useSensor(MouseSensor, {}), useSensor(TouchSensor, {}), useSensor(KeyboardSensor, { coordinateGetter: sortableKeyboardCoordinates, - }) - ) + }), + ); const table = useReactTable({ data, @@ -385,26 +393,26 @@ export function SkillHealthGrid({ getSortedRowModel: getSortedRowModel(), getFacetedRowModel: getFacetedRowModel(), getFacetedUniqueValues: getFacetedUniqueValues(), - }) + }); const dataIds = React.useMemo( () => table.getRowModel().rows.map((r) => r.id), - [table.getRowModel().rows] - ) + [table.getRowModel().rows], + ); - const isSorted = sorting.length > 0 + const isSorted = sorting.length > 0; function handleDragEnd(event: DragEndEvent) { - if (isSorted) return - const { active, over } = event + if (isSorted) return; + const { active, over } = event; if (active && over && active.id !== over.id) { setData((prev) => { - const ids = prev.map((d) => d.name) - const oldIndex = ids.indexOf(active.id as string) - const newIndex = ids.indexOf(over.id as string) - if (oldIndex === -1 || newIndex === -1) return prev - return arrayMove(prev, oldIndex, newIndex) - }) + const ids = prev.map((d) => d.name); + const oldIndex = ids.indexOf(active.id as string); + const newIndex = ids.indexOf(over.id as string); + if (oldIndex === -1 || newIndex === -1) return prev; + return arrayMove(prev, oldIndex, newIndex); + }); } } @@ -419,15 +427,8 @@ export function SkillHealthGrid({ - v && setActiveView(v)}> + @@ -447,21 +448,15 @@ export function SkillHealthGrid({ Needs Attention{" "} - {viewCounts.attention > 0 && ( - {viewCounts.attention} - )} + {viewCounts.attention > 0 && {viewCounts.attention}} Recently Active{" "} - {viewCounts.recent > 0 && ( - {viewCounts.recent} - )} + {viewCounts.recent > 0 && {viewCounts.recent}} Ungraded{" "} - {viewCounts.ungraded > 0 && ( - {viewCounts.ungraded} - )} + {viewCounts.ungraded > 0 && {viewCounts.ungraded}} @@ -470,7 +465,9 @@ export function SkillHealthGrid({ }> - {statusFilter && statusFilter !== "ALL" ? statusFilter.charAt(0) + statusFilter.slice(1).toLowerCase() : "Status"} + {statusFilter && statusFilter !== "ALL" + ? statusFilter.charAt(0) + statusFilter.slice(1).toLowerCase() + : "Status"} @@ -478,14 +475,40 @@ export function SkillHealthGrid({ value={statusFilter ?? "ALL"} onValueChange={(v) => onStatusFilterChange(v as SkillHealthStatus | "ALL")} > - {([ - { label: "All", value: "ALL" as const, icon: }, - { label: "Healthy", value: "HEALTHY" as const, icon: }, - { label: "Warning", value: "WARNING" as const, icon: }, - { label: "Critical", value: "CRITICAL" as const, icon: }, - { label: "Ungraded", value: "UNGRADED" as const, icon: }, - { label: "Unknown", value: "UNKNOWN" as const, icon: }, - ] as const).map((f) => ( + {( + [ + { + label: "All", + value: "ALL" as const, + icon: , + }, + { + label: "Healthy", + value: "HEALTHY" as const, + icon: , + }, + { + label: "Warning", + value: "WARNING" as const, + icon: , + }, + { + label: "Critical", + value: "CRITICAL" as const, + icon: , + }, + { + label: "Ungraded", + value: "UNGRADED" as const, + icon: , + }, + { + label: "Unknown", + value: "UNKNOWN" as const, + icon: , + }, + ] as const + ).map((f) => ( {f.icon} @@ -506,26 +529,25 @@ export function SkillHealthGrid({ {table .getAllColumns() - .filter( - (column) => - typeof column.accessorFn !== "undefined" && - column.getCanHide() - ) + .filter((column) => typeof column.accessorFn !== "undefined" && column.getCanHide()) .map((column) => ( - column.toggleVisibility(!!value) - } + onCheckedChange={(value) => column.toggleVisibility(!!value)} > - {column.id === "scope" ? "Scope" - : column.id === "passRate" ? "Pass Rate" - : column.id === "uniqueSessions" ? "Sessions" - : column.id === "lastSeen" ? "Last Seen" - : column.id === "hasEvidence" ? "Evidence" - : column.id} + {column.id === "scope" + ? "Scope" + : column.id === "passRate" + ? "Pass Rate" + : column.id === "uniqueSessions" + ? "Sessions" + : column.id === "lastSeen" + ? "Last Seen" + : column.id === "hasEvidence" + ? "Evidence" + : column.id} ))} @@ -533,7 +555,10 @@ export function SkillHealthGrid({
- +
{header.isPlaceholder ? null - : flexRender( - header.column.columnDef.header, - header.getContext() - )} - {header.column.getIsSorted() === "asc" ? " ↑" - : header.column.getIsSorted() === "desc" ? " ↓" - : null} + : flexRender(header.column.columnDef.header, header.getContext())} + {header.column.getIsSorted() === "asc" + ? " ↑" + : header.column.getIsSorted() === "desc" + ? " ↓" + : null}
))} @@ -571,10 +595,7 @@ export function SkillHealthGrid({ {table.getRowModel().rows?.length ? ( - + {table.getRowModel().rows.map((row) => ( ))} @@ -610,7 +631,7 @@ export function SkillHealthGrid({