From 935b5eeba767e4b644cb7e5670661140ba9b36f7 Mon Sep 17 00:00:00 2001 From: jaelgeng Date: Fri, 26 Jun 2026 11:36:25 +0800 Subject: [PATCH 1/4] vendor(wiki-engine): copy team-wiki deterministic extraction modules Vendored from team-wiki by @lurkacai (git.woa.com/lurkacai/team-wiki). Import paths adjusted for teamai-cli project structure. Files copied (all pure deterministic, no AI dependency): - core/graph-index.schema.ts: graph node/edge types, merge, save/load - core/wiki-protocol.ts: wiki category/confidence types, slugify - code-knowledge/code-collector.ts: file collection with git-aware filtering - code-knowledge/code-extractors.ts: multi-language fact extraction dispatch - code-knowledge/code-graph.ts: build CodeGraphIndex from facts - code-knowledge/code-incremental.ts: detect changed files via manifest - code-knowledge/extractors/*: TS/Python/Go/Java/Rust/Config extractors - interface-scanner.ts: HTTP/MQ/RPC endpoint detection (5 languages) - call-chain-tracer.ts: 4-layer call chain tracing - code-graph-overlay.ts: directory-level architecture nodes - doc-graph-extractor.ts: extract API/config/error nodes from docs - manifest-schema.ts: V2 manifest types (entrypoints, responsibilities) --- src/wiki-engine/call-chain-tracer.ts | 255 +++++++++++ src/wiki-engine/code-graph-overlay.ts | 45 ++ .../code-knowledge/code-collector.ts | 219 +++++++++ .../code-knowledge/code-extractors.ts | 73 +++ src/wiki-engine/code-knowledge/code-graph.ts | 171 +++++++ .../code-knowledge/code-incremental.ts | 45 ++ .../code-knowledge/extractors/config.ts | 64 +++ .../code-knowledge/extractors/go.ts | 130 ++++++ .../code-knowledge/extractors/index.ts | 49 ++ .../code-knowledge/extractors/java.ts | 126 ++++++ .../code-knowledge/extractors/python.ts | 126 ++++++ .../code-knowledge/extractors/rust.ts | 143 ++++++ .../code-knowledge/extractors/typescript.ts | 102 +++++ src/wiki-engine/core/graph-index.schema.ts | 418 ++++++++++++++++++ src/wiki-engine/core/wiki-protocol.ts | 197 +++++++++ src/wiki-engine/doc-graph-extractor.ts | 231 ++++++++++ src/wiki-engine/interface-scanner.ts | 280 ++++++++++++ src/wiki-engine/manifest-schema.ts | 90 ++++ 18 files changed, 2764 insertions(+) create mode 100644 src/wiki-engine/call-chain-tracer.ts create mode 100644 src/wiki-engine/code-graph-overlay.ts create mode 100644 src/wiki-engine/code-knowledge/code-collector.ts create mode 100644 src/wiki-engine/code-knowledge/code-extractors.ts create mode 100644 src/wiki-engine/code-knowledge/code-graph.ts create mode 100644 src/wiki-engine/code-knowledge/code-incremental.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/config.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/go.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/index.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/java.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/python.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/rust.ts create mode 100644 src/wiki-engine/code-knowledge/extractors/typescript.ts create mode 100644 src/wiki-engine/core/graph-index.schema.ts create mode 100644 src/wiki-engine/core/wiki-protocol.ts create mode 100644 src/wiki-engine/doc-graph-extractor.ts create mode 100644 src/wiki-engine/interface-scanner.ts create mode 100644 src/wiki-engine/manifest-schema.ts diff --git a/src/wiki-engine/call-chain-tracer.ts b/src/wiki-engine/call-chain-tracer.ts new file mode 100644 index 0000000..6d3da50 --- /dev/null +++ b/src/wiki-engine/call-chain-tracer.ts @@ -0,0 +1,255 @@ +import type { CodeCollectedFile } from './code-knowledge/code-collector.js'; +import type { CodeFact } from './code-knowledge/code-extractors.js'; + +export type CallChainLayer = "entry" | "orchestration" | "service" | "data"; + +export interface CallChainStep { + layer: CallChainLayer; + file: string; + lineStart: number; + symbol: string; + callsTo: string[]; // symbols it calls +} + +export interface CallChain { + entryPoint: string; + steps: CallChainStep[]; + depth: number; +} + +// --- Layer classification heuristics --- + +const ENTRY_PATTERNS = [ + /handler/i, + /route/i, + /controller/i, + /endpoint/i, + /main\.(ts|go|py|rs|java)$/, + /server\.(ts|go|py|rs|java)$/, + /app\.(ts|go|py|rs|java)$/, +]; + +const ORCHESTRATION_PATTERNS = [ + /workflow/i, + /saga/i, + /dispatcher/i, + /orchestrat/i, + /coordinator/i, + /pipeline/i, + /scheduler/i, + /command/i, +]; + +const DATA_PATTERNS = [ + /\bdb\b/i, + /repository/i, + /\bdao\b/i, + /model/i, + /store/i, + /database/i, + /migration/i, + /schema/i, + /query/i, + /entity/i, +]; + +function classifyLayer(filePath: string, symbol: string): CallChainLayer { + const combined = `${filePath} ${symbol}`; + + if (ENTRY_PATTERNS.some((p) => p.test(combined))) return "entry"; + if (ORCHESTRATION_PATTERNS.some((p) => p.test(combined))) return "orchestration"; + if (DATA_PATTERNS.some((p) => p.test(combined))) return "data"; + return "service"; +} + +/** + * Trace call chains from entry points through the codebase. + * Simplified version of codebase-mind's 3-layer penetration analysis. + * + * 1. Find entry points (handlers, routes, main functions) + * 2. For each entry point, trace through relations (imports/calls) + * 3. Classify each step by layer (entry -> orchestration -> service -> data) + * 4. Return chains up to depth 4 + */ +export function traceCallChains(facts: CodeFact[], files: CodeCollectedFile[]): CallChain[] { + const MAX_DEPTH = 4; + + // Build lookup structures + const relationsByFile = buildRelationsByFile(facts); + const componentsByFile = buildComponentsByFile(facts); + const filesByModule = buildFilesByModule(files); + + // Find entry points + const entryPoints = findEntryPoints(facts, files); + + const chains: CallChain[] = []; + + for (const entry of entryPoints) { + const visited = new Set(); + const steps: CallChainStep[] = []; + + traceFromEntry(entry.file, entry.symbol, 0); + + if (steps.length > 0) { + chains.push({ + entryPoint: `${entry.symbol} (${entry.file})`, + steps, + depth: steps.length, + }); + } + + function traceFromEntry(file: string, symbol: string, depth: number): void { + if (depth >= MAX_DEPTH) return; + + const key = `${file}:${symbol}`; + if (visited.has(key)) return; + visited.add(key); + + const layer = classifyLayer(file, symbol); + const relations = relationsByFile.get(file) ?? []; + const callsTo: string[] = []; + + // Find what this file/symbol calls + for (const relation of relations) { + const targetFiles = resolveRelationTarget(relation.name, filesByModule); + for (const targetFile of targetFiles) { + const targetComponents = componentsByFile.get(targetFile) ?? []; + for (const comp of targetComponents) { + callsTo.push(comp.name); + } + } + } + + steps.push({ + layer, + file, + lineStart: entry.lineStart, + symbol, + callsTo: callsTo.slice(0, 10), + }); + + // Recurse into called modules + for (const relation of relations.slice(0, 5)) { + const targetFiles = resolveRelationTarget(relation.name, filesByModule); + for (const targetFile of targetFiles.slice(0, 2)) { + const targetComponents = componentsByFile.get(targetFile) ?? []; + const primary = targetComponents[0]; + if (primary) { + traceFromEntry(targetFile, primary.name, depth + 1); + } + } + } + } + } + + // Sort chains by depth (deepest first) for more useful output + chains.sort((a, b) => b.depth - a.depth); + return chains; +} + +interface EntryPoint { + file: string; + symbol: string; + lineStart: number; +} + +function findEntryPoints(facts: CodeFact[], files: CodeCollectedFile[]): EntryPoint[] { + const entryPoints: EntryPoint[] = []; + const seen = new Set(); + + // From facts: look for handler/route components + for (const fact of facts) { + if (fact.kind !== "component" && fact.kind !== "interface") continue; + + const isEntry = + ENTRY_PATTERNS.some((p) => p.test(fact.file)) || + ENTRY_PATTERNS.some((p) => p.test(fact.name)) || + /^(GET|POST|PUT|DELETE|PATCH)\s+\//u.test(fact.name); + + if (isEntry) { + const key = `${fact.file}:${fact.name}`; + if (!seen.has(key)) { + seen.add(key); + entryPoints.push({ file: fact.file, symbol: fact.name, lineStart: fact.lineStart }); + } + } + } + + // From files: look for key files that are likely entry points + for (const file of files) { + if (!file.isKeyFile) continue; + if (ENTRY_PATTERNS.some((p) => p.test(file.relativePath))) { + const key = `${file.relativePath}:main`; + if (!seen.has(key)) { + seen.add(key); + entryPoints.push({ file: file.relativePath, symbol: "main", lineStart: 1 }); + } + } + } + + return entryPoints; +} + +function buildRelationsByFile(facts: CodeFact[]): Map { + const map = new Map(); + for (const fact of facts) { + if (fact.kind !== "relation") continue; + const group = map.get(fact.file) ?? []; + group.push(fact); + map.set(fact.file, group); + } + return map; +} + +function buildComponentsByFile(facts: CodeFact[]): Map { + const map = new Map(); + for (const fact of facts) { + if (fact.kind !== "component") continue; + const group = map.get(fact.file) ?? []; + group.push(fact); + map.set(fact.file, group); + } + return map; +} + +function buildFilesByModule(files: CodeCollectedFile[]): Map { + const map = new Map(); + for (const file of files) { + // Index by various forms of the path for flexible resolution + const relativePath = file.relativePath; + const withoutExt = relativePath.replace(/\.[^.]+$/, ""); + const basename = withoutExt.split("/").pop() ?? ""; + + for (const key of [relativePath, withoutExt, basename]) { + if (key) { + const group = map.get(key) ?? []; + group.push(relativePath); + map.set(key, group); + } + } + } + return map; +} + +function resolveRelationTarget(importPath: string, filesByModule: Map): string[] { + // Normalize import path + const normalized = importPath + .replace(/^\.\//, "") + .replace(/\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java)$/, ""); + + // Try exact match first + const exact = filesByModule.get(normalized); + if (exact) return exact; + + // Try with common patterns + const withIndex = `${normalized}/index`; + const indexMatch = filesByModule.get(withIndex); + if (indexMatch) return indexMatch; + + // Try basename only + const basename = normalized.split("/").pop() ?? ""; + const baseMatch = filesByModule.get(basename); + if (baseMatch) return baseMatch; + + return []; +} diff --git a/src/wiki-engine/code-graph-overlay.ts b/src/wiki-engine/code-graph-overlay.ts new file mode 100644 index 0000000..9a6b8ca --- /dev/null +++ b/src/wiki-engine/code-graph-overlay.ts @@ -0,0 +1,45 @@ +import { + createGraphIndex, + toPageSlug, + type GraphEdge, + type GraphNode, +} from './core/graph-index.schema.js'; + +/** Hub edges from evidence index to kind pages when AST is unavailable. */ +export function buildIndexHubOverlay( + project: string, + codeOutputDir: string, + kindPageSlugs: string[], +): ReturnType { + const indexSlug = toPageSlug(`${codeOutputDir}/${project}/index`); + const nodes: GraphNode[] = [ + { + slug: indexSlug, + type: "architecture", + confidence: "EXTRACTED", + title: `${project} code index`, + domain: "code-knowledge", + }, + ]; + const edges: GraphEdge[] = []; + for (const slug of kindPageSlugs) { + if (slug === indexSlug) { + continue; + } + nodes.push({ + slug, + type: "component", + confidence: "EXTRACTED", + title: slug.split("/").pop() ?? slug, + domain: "code-knowledge", + }); + edges.push({ + from: indexSlug, + to: slug, + relation: "CONTAINS", + weight: 0.6, + source: "code-heuristic", + }); + } + return createGraphIndex(nodes, edges); +} diff --git a/src/wiki-engine/code-knowledge/code-collector.ts b/src/wiki-engine/code-knowledge/code-collector.ts new file mode 100644 index 0000000..754a020 --- /dev/null +++ b/src/wiki-engine/code-knowledge/code-collector.ts @@ -0,0 +1,219 @@ +import { createHash } from "node:crypto"; +import { execFile } from "node:child_process"; +import { readFile, readdir, stat } from "node:fs/promises"; +import path from "node:path"; +import { promisify } from "node:util"; + +import { safeIgnore, toPosix } from "../core/wiki-protocol.js"; + +const execFileAsync = promisify(execFile); + +export interface CodeCollectedFile { + path: string; + relativePath: string; + language: string; + sha256: string; + content: string; + isKeyFile?: boolean; + repo?: string; +} + +export const KEY_FILE_PATTERNS: Record = { + go: [/main\.go$/, /cmd\/.*\.go$/, /handler.*\.go$/, /server\.go$/, /router\.go$/], + python: [/main\.py$/, /app\.py$/, /server\.py$/, /routes?\.py$/, /models?\.py$/], + java: [/Application\.java$/, /Controller\.java$/, /Service\.java$/], + typescript: [/index\.ts$/, /server\.ts$/, /app\.ts$/, /router\.ts$/], + rust: [/main\.rs$/, /lib\.rs$/, /mod\.rs$/] +}; + +export function isKeyFile(relativePath: string, language: string): boolean { + const patterns = KEY_FILE_PATTERNS[language]; + if (!patterns) return false; + return patterns.some((pattern) => pattern.test(relativePath)); +} + +export interface CodeCollectionManifest { + schemaVersion: "team-wiki.code-collection.v1"; + root: string; + commit?: string; + collectedAt: string; + files: Array>; +} + +export interface CollectCodeOptions { + root: string; + maxFiles?: number; + includeTests?: boolean; + changedFiles?: string[]; +} + +export async function collectCode(options: CollectCodeOptions): Promise<{ manifest: CodeCollectionManifest; files: CodeCollectedFile[] }> { + const root = path.resolve(options.root); + const filePaths: string[] = []; + await walk(root, filePaths, options.includeTests ?? false); + + let filtered = filePaths.sort(); + + // Filter to only changed files if specified + if (options.changedFiles && options.changedFiles.length > 0) { + const changedSet = new Set(options.changedFiles.map((f) => toPosix(f))); + filtered = filtered.filter((fp) => { + const relativePath = toPosix(path.relative(root, fp)); + return changedSet.has(relativePath); + }); + } + + const limited = filtered.slice(0, options.maxFiles ?? 200); + const files: CodeCollectedFile[] = []; + + for (const filePath of limited) { + const content = await readFile(filePath, "utf8"); + const relativePath = toPosix(path.relative(root, filePath)); + const language = languageFor(filePath); + files.push({ + path: filePath, + relativePath, + language, + sha256: createHash("sha256").update(content).digest("hex"), + content, + isKeyFile: isKeyFile(relativePath, language) + }); + } + + return { + manifest: { + schemaVersion: "team-wiki.code-collection.v1", + root, + commit: await gitCommit(root), + collectedAt: new Date().toISOString(), + files: files.map(({ content: _content, ...file }) => file) + }, + files + }; +} + +async function walk(directory: string, results: string[], includeTests: boolean): Promise { + if (safeIgnore(directory)) { + return; + } + for (const entry of await readdir(directory, { withFileTypes: true })) { + const fullPath = path.join(directory, entry.name); + if (safeIgnore(fullPath) || (!includeTests && isTestPath(fullPath))) { + continue; + } + if (entry.isDirectory()) { + await walk(fullPath, results, includeTests); + } else if (entry.isFile() && isCodeFile(fullPath) && (await stat(fullPath)).size < 256_000) { + results.push(fullPath); + } + } +} + +function isCodeFile(filePath: string): boolean { + return [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", ".py", ".go", ".rs", ".java", ".json", ".yaml", ".yml", ".toml", ".sql", ".conf", ".ini"].includes( + path.extname(filePath).toLowerCase() + ); +} + +function isTestPath(filePath: string): boolean { + return /(^|\/|\\)(test|tests|__tests__|fixtures)(\/|\\)|\.test\.|\.spec\./u.test(filePath); +} + +function languageFor(filePath: string): string { + const ext = path.extname(filePath).toLowerCase(); + const map: Record = { + ".ts": "typescript", ".tsx": "typescript", ".js": "javascript", ".jsx": "javascript", + ".py": "python", ".go": "go", ".rs": "rust", ".java": "java", + ".json": "json", ".yaml": "yaml", ".yml": "yaml", + ".toml": "toml", ".sql": "sql", ".conf": "toml", ".ini": "toml", + }; + return map[ext] ?? "text"; +} + +async function gitCommit(root: string): Promise { + try { + const { stdout } = await execFileAsync("git", ["-C", root, "rev-parse", "HEAD"]); + return stdout.trim() || undefined; + } catch { + return undefined; + } +} + +// --- Multi-repo support --- + +export interface RepoEntry { + name: string; + path: string; + language?: string; // auto-detected if not provided +} + +export interface MultiRepoCollectOptions { + repos: RepoEntry[]; + maxFilesPerRepo?: number; + includeTests?: boolean; +} + +export interface MultiRepoManifest { + schemaVersion: "team-wiki.multi-repo.v1"; + repos: Array; + collectedAt: string; + totalFiles: number; +} + +export async function collectMultiRepo(options: MultiRepoCollectOptions): Promise<{ + manifest: MultiRepoManifest; + files: CodeCollectedFile[]; +}> { + const allFiles: CodeCollectedFile[] = []; + const repoDetails: MultiRepoManifest["repos"] = []; + + for (const repo of options.repos) { + const collection = await collectCode({ + root: repo.path, + maxFiles: options.maxFilesPerRepo ?? 200, + includeTests: options.includeTests ?? false + }); + + const repoFiles = collection.files.map((file) => ({ ...file, repo: repo.name })); + allFiles.push(...repoFiles); + + const primaryLanguage = repo.language ?? detectPrimaryLanguage(repoFiles); + repoDetails.push({ + name: repo.name, + path: repo.path, + language: repo.language, + commit: collection.manifest.commit, + fileCount: repoFiles.length, + primaryLanguage + }); + } + + return { + manifest: { + schemaVersion: "team-wiki.multi-repo.v1", + repos: repoDetails, + collectedAt: new Date().toISOString(), + totalFiles: allFiles.length + }, + files: allFiles + }; +} + +function detectPrimaryLanguage(files: CodeCollectedFile[]): string { + const counts = new Map(); + for (const file of files) { + if (file.language !== "json" && file.language !== "yaml" && file.language !== "text") { + counts.set(file.language, (counts.get(file.language) ?? 0) + 1); + } + } + if (counts.size === 0) return "unknown"; + let max = 0; + let primary = "unknown"; + for (const [lang, count] of counts) { + if (count > max) { + max = count; + primary = lang; + } + } + return primary; +} diff --git a/src/wiki-engine/code-knowledge/code-extractors.ts b/src/wiki-engine/code-knowledge/code-extractors.ts new file mode 100644 index 0000000..c37dd41 --- /dev/null +++ b/src/wiki-engine/code-knowledge/code-extractors.ts @@ -0,0 +1,73 @@ +import { type CodeCollectedFile } from "./code-collector.js"; +import { extractForLanguage } from "./extractors/index.js"; + +export type CodeFactKind = "component" | "interface" | "config" | "error" | "data" | "style" | "relation"; + +export type CodeEvidenceType = "definition" | "implementation" | "usage" | "schema" | "config"; + +/** + * Map a CodeFactKind to a WikiEvidenceType. + */ +export function mapKindToEvidenceType(kind: CodeFactKind): CodeEvidenceType { + switch (kind) { + case "component": + case "interface": + case "error": + return "definition"; + case "config": + return "config"; + case "data": + return "schema"; + case "relation": + return "usage"; + case "style": + return "definition"; + } +} + +export interface CodeFact { + kind: CodeFactKind; + name: string; + file: string; + lineStart: number; + lineEnd?: number; + detail: string; + confidence: "EXTRACTED" | "INFERRED" | "AMBIGUOUS"; + evidenceType?: CodeEvidenceType; +} + +/** + * Extract code facts from collected files. + * Groups files by language, then dispatches to language-specific extractors. + */ +export function extractCodeFacts(files: CodeCollectedFile[]): CodeFact[] { + const byLanguage = groupByLanguage(files); + const facts: CodeFact[] = []; + for (const [language, langFiles] of byLanguage) { + facts.push(...extractForLanguage(language, langFiles)); + } + return dedupe(facts); +} + +function groupByLanguage(files: CodeCollectedFile[]): Map { + const map = new Map(); + for (const file of files) { + const group = map.get(file.language) ?? []; + group.push(file); + map.set(file.language, group); + } + return map; +} + +function dedupe(facts: CodeFact[]): CodeFact[] { + const seen = new Set(); + const result: CodeFact[] = []; + for (const fact of facts) { + const key = `${fact.kind}:${fact.name}:${fact.file}:${fact.lineStart}`; + if (!seen.has(key)) { + seen.add(key); + result.push(fact); + } + } + return result; +} diff --git a/src/wiki-engine/code-knowledge/code-graph.ts b/src/wiki-engine/code-knowledge/code-graph.ts new file mode 100644 index 0000000..953905b --- /dev/null +++ b/src/wiki-engine/code-knowledge/code-graph.ts @@ -0,0 +1,171 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; + +import { type CodeFact } from "./code-extractors.js"; +import { + type GraphIndex, + type GraphNode, + type GraphEdge, + createGraphIndex, + addNode, + addEdge, + GRAPH_INDEX_SCHEMA_VERSION, +} from "../core/graph-index.schema.js"; + +export interface CodeGraphIndex { + schemaVersion: "team-wiki.code-graph.v1"; + generatedAt: string; + nodes: Array<{ id: string; kind: CodeFact["kind"]; label: string; file: string }>; + edges: Array<{ from: string; to: string; relation: "imports" | "mentions" }>; +} + +export async function writeCodeGraph(wikiRoot: string, project: string, facts: CodeFact[]): Promise<{ graph: CodeGraphIndex; path: string }> { + const graph = buildCodeGraph(facts); + const graphPath = path.join(wikiRoot, "graph", `${project}-graph-index.json`); + await mkdir(path.dirname(graphPath), { recursive: true }); + await writeFile(graphPath, `${JSON.stringify(graph, null, 2)}\n`, "utf8"); + return { graph, path: graphPath }; +} + +export function buildCodeGraph(facts: CodeFact[]): CodeGraphIndex { + const nodes = facts + .filter((fact) => fact.kind !== "relation") + .map((fact) => ({ id: `${fact.kind}:${fact.name}:${fact.file}`, kind: fact.kind, label: fact.name, file: fact.file })); + const nodeFiles = new Set(nodes.map((node) => node.file)); + const edges = facts + .filter((fact) => fact.kind === "relation") + .flatMap((fact) => [...nodeFiles].filter((file) => relationMayTarget(fact.name, file)).map((file) => ({ from: fact.file, to: file, relation: "imports" as const }))); + return { schemaVersion: "team-wiki.code-graph.v1", generatedAt: new Date().toISOString(), nodes, edges }; +} + +function relationMayTarget(importTarget: string, file: string): boolean { + const normalized = importTarget.replace(/^\.\//u, "").replace(/\.(ts|tsx|js|jsx)$/u, ""); + return file.includes(normalized); +} + +// ─── Unified Graph Compiler: build a full GraphIndex from component-level data ── + +export interface CodeComponent { + slug: string; + title: string; + category: string; + imports: string[]; + exports: string[]; + calls: string[]; +} + +/** + * Build a full GraphIndex from high-level code components. + * + * Creates DEPENDS_ON edges from imports (component A imports component B), + * and REFERENCES edges from call chains (component A calls into component B). + */ +export function buildCodeGraphIndex(components: Array<{ + slug: string; + title: string; + category: string; + imports: string[]; + exports: string[]; + calls: string[]; +}>): GraphIndex { + const nodes: GraphNode[] = components.map((c) => ({ + slug: c.slug, + type: mapCategoryToWikiCategory(c.category), + confidence: "EXTRACTED" as const, + title: c.title, + })); + + const edges: GraphEdge[] = []; + const edgeSet = new Set(); + + // Build a lookup: export name → component slug + const exportIndex = new Map(); + for (const comp of components) { + for (const exp of comp.exports) { + exportIndex.set(exp, comp.slug); + } + } + + // Build DEPENDS_ON edges from imports + for (const comp of components) { + for (const imp of comp.imports) { + const targetSlug = exportIndex.get(imp) ?? findComponentBySlugMatch(imp, components); + if (targetSlug && targetSlug !== comp.slug) { + const key = `${comp.slug}|${targetSlug}|DEPENDS_ON`; + if (!edgeSet.has(key)) { + edgeSet.add(key); + edges.push({ + from: comp.slug, + to: targetSlug, + relation: "DEPENDS_ON", + weight: 0.9, + }); + } + } + } + } + + // Build REFERENCES edges from call chains + for (const comp of components) { + for (const call of comp.calls) { + const targetSlug = exportIndex.get(call) ?? findComponentBySlugMatch(call, components); + if (targetSlug && targetSlug !== comp.slug) { + const key = `${comp.slug}|${targetSlug}|REFERENCES`; + if (!edgeSet.has(key)) { + edgeSet.add(key); + edges.push({ + from: comp.slug, + to: targetSlug, + relation: "REFERENCES", + weight: 0.7, + }); + } + } + } + } + + return createGraphIndex(nodes, edges); +} + +/** + * Try to match an import/call target to a component slug by substring matching. + */ +function findComponentBySlugMatch( + target: string, + components: Array<{ slug: string }> +): string | undefined { + const normalized = target.toLowerCase().replace(/[^a-z0-9]/g, ""); + return components.find((c) => { + const slugNorm = c.slug.toLowerCase().replace(/[^a-z0-9]/g, ""); + return slugNorm.includes(normalized) || normalized.includes(slugNorm); + })?.slug; +} + +/** + * Map a freeform category string to a WikiCategory type. + */ +function mapCategoryToWikiCategory(category: string): "component" | "interface" | "config" | "rule" | "process" | "decision" | "mapping" { + switch (category.toLowerCase()) { + case "component": + case "module": + case "service": + return "component"; + case "interface": + case "api": + case "type": + return "interface"; + case "config": + case "configuration": + return "config"; + case "rule": + case "validation": + return "rule"; + case "process": + case "workflow": + return "process"; + case "decision": + return "decision"; + default: + return "component"; + } +} diff --git a/src/wiki-engine/code-knowledge/code-incremental.ts b/src/wiki-engine/code-knowledge/code-incremental.ts new file mode 100644 index 0000000..d9147a9 --- /dev/null +++ b/src/wiki-engine/code-knowledge/code-incremental.ts @@ -0,0 +1,45 @@ +import { readFile, stat } from "node:fs/promises"; +import path from "node:path"; + +import { collectCode } from "./code-collector.js"; + +export interface CodeIncrementalChange { + added: string[]; + changed: string[]; + deleted: string[]; + affectedPages: string[]; +} + +export async function detectCodeIncrementalChanges(root: string, manifestPath: string, project: string): Promise { + const previous = (await exists(manifestPath)) ? (JSON.parse(await readFile(manifestPath, "utf8")) as { files?: Array<{ relativePath: string; sha256: string }> }) : { files: [] }; + const current = await collectCode({ root }); + const previousByPath = new Map((previous.files ?? []).map((file) => [file.relativePath, file.sha256])); + const currentByPath = new Map(current.manifest.files.map((file) => [file.relativePath, file.sha256])); + const added = [...currentByPath.keys()].filter((file) => !previousByPath.has(file)).sort(); + const changed = [...currentByPath.entries()].filter(([file, sha]) => previousByPath.has(file) && previousByPath.get(file) !== sha).map(([file]) => file).sort(); + const deleted = [...previousByPath.keys()].filter((file) => !currentByPath.has(file)).sort(); + return { added, changed, deleted, affectedPages: affectedPages(project, [...added, ...changed, ...deleted]) }; +} + +function affectedPages(project: string, files: string[]): string[] { + const pages = new Set([`code/${project}/index.md`]); + for (const file of files) { + if (/config|\.json$|\.ya?ml$/u.test(file)) { + pages.add(`code/${project}/config.md`); + } + if (/error|exception/i.test(file)) { + pages.add(`code/${project}/error.md`); + } + pages.add(`code/${project}/component.md`); + } + return [...pages].sort(); +} + +async function exists(filePath: string): Promise { + try { + await stat(path.resolve(filePath)); + return true; + } catch { + return false; + } +} diff --git a/src/wiki-engine/code-knowledge/extractors/config.ts b/src/wiki-engine/code-knowledge/extractors/config.ts new file mode 100644 index 0000000..1d92b1f --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/config.ts @@ -0,0 +1,64 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +function makeFact(kind: CodeFactKind, name: string, file: string, line: number, detail: string): CodeFact { + return { kind, name, file, lineStart: line, detail, confidence: "EXTRACTED", evidenceType: mapKindToEvidenceType(kind) }; +} + +/** + * Extract config facts from TOML/INI/CONF files. + * Captures section headers and key-value pairs. + */ +export function extractToml(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + for (const file of files) { + const lines = file.content.split("\n"); + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + // [section] headers + const sectionMatch = line.match(/^\[([^\]]+)\]$/); + if (sectionMatch) { + facts.push(makeFact("config", sectionMatch[1], file.relativePath, i + 1, line)); + continue; + } + // KEY = value (uppercase keys are likely env/config constants) + const kvMatch = line.match(/^([A-Z][A-Z0-9_]{2,})\s*=\s*(.+)/); + if (kvMatch) { + facts.push(makeFact("config", kvMatch[1], file.relativePath, i + 1, line)); + } + } + } + return facts; +} + +/** + * Extract facts from SQL files. + * Captures CREATE TABLE/INDEX, ALTER TABLE, and key INSERT patterns. + */ +export function extractSql(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + for (const file of files) { + const lines = file.content.split("\n"); + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + // CREATE TABLE + const createTable = line.match(/CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`"']?(\w+)[`"']?/i); + if (createTable) { + facts.push(makeFact("data", createTable[1], file.relativePath, i + 1, line)); + continue; + } + // ALTER TABLE + const alterTable = line.match(/ALTER\s+TABLE\s+[`"']?(\w+)[`"']?/i); + if (alterTable) { + facts.push(makeFact("data", `alter:${alterTable[1]}`, file.relativePath, i + 1, line)); + continue; + } + // CREATE INDEX + const createIndex = line.match(/CREATE\s+(?:UNIQUE\s+)?INDEX\s+[`"']?(\w+)[`"']?/i); + if (createIndex) { + facts.push(makeFact("data", `index:${createIndex[1]}`, file.relativePath, i + 1, line)); + } + } + } + return facts; +} diff --git a/src/wiki-engine/code-knowledge/extractors/go.ts b/src/wiki-engine/code-knowledge/extractors/go.ts new file mode 100644 index 0000000..24686ba --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/go.ts @@ -0,0 +1,130 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +/** + * Go extractor. + * Extracts structs, funcs, interfaces, HTTP handlers, configs, errors, and import relations. + */ +export function extractGo(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNumber = i + 1; + + // --- Components --- + const structDecl = /^type\s+([A-Z][A-Za-z0-9_]*)\s+struct\b/u.exec(line); + if (structDecl) { + facts.push(makeFact("component", structDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const funcNew = /^func\s+New([A-Z][A-Za-z0-9_]*)\s*\(/u.exec(line); + if (funcNew) { + facts.push(makeFact("component", `New${funcNew[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const packageDecl = /^package\s+([a-z][a-z0-9_]*)/u.exec(line); + if (packageDecl) { + facts.push(makeFact("component", `package:${packageDecl[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const topLevelFunc = /^func\s+([A-Z][A-Za-z0-9_]*)\s*\(/u.exec(line); + if (topLevelFunc && !funcNew) { + facts.push(makeFact("component", topLevelFunc[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Interfaces --- + const ifaceDecl = /^type\s+([A-Z][A-Za-z0-9_]*)\s+interface\b/u.exec(line); + if (ifaceDecl) { + facts.push(makeFact("interface", ifaceDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // HTTP handler methods: func (h *Handler) ServeHTTP(...) + const handlerMethod = /^func\s+\([^)]*\*?(\w+)\)\s+(ServeHTTP|Handle|Handler)\s*\(/u.exec(line); + if (handlerMethod) { + facts.push(makeFact("interface", `${handlerMethod[1]}.${handlerMethod[2]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Router registrations: r.HandleFunc("/path", handler) + const routeReg = /\.\s*(?:HandleFunc|Handle|Get|Post|Put|Delete|Patch)\s*\(\s*["'](\/[^"']*)/u.exec(line); + if (routeReg) { + facts.push(makeFact("interface", routeReg[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Configs --- + const envGet = /os\.Getenv\(\s*["']([A-Z][A-Z0-9_]+)["']\s*\)/u.exec(line); + if (envGet) { + facts.push(makeFact("config", envGet[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // yaml/toml struct tags + const structTag = /`(?:yaml|toml|json):"([^",]+)"/u.exec(line); + if (structTag) { + facts.push(makeFact("config", `tag:${structTag[1]}`, file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Errors --- + const errVar = /^var\s+(Err[A-Z][A-Za-z0-9_]*)\s*=\s*(?:errors\.New|fmt\.Errorf)/u.exec(line); + if (errVar) { + facts.push(makeFact("error", errVar[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const errConst = /^\s*(Err[A-Z][A-Za-z0-9_]*)\s*(?:=|error)/u.exec(line); + if (errConst && !errVar) { + const inBlock = isInsideBlock(lines, i, "const", "var"); + if (inBlock) { + facts.push(makeFact("error", errConst[1], file.relativePath, lineNumber, line, "INFERRED")); + } + } + + const fmtErrorf = /fmt\.Errorf\s*\(\s*["']([^"']{1,60})/u.exec(line); + if (fmtErrorf && !errVar) { + facts.push(makeFact("error", fmtErrorf[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Relations --- + const importPath = /^\s*"([^"]+)"/u.exec(line); + if (importPath && isInsideBlock(lines, i, "import")) { + facts.push(makeFact("relation", importPath[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const singleImport = /^import\s+"([^"]+)"/u.exec(line); + if (singleImport) { + facts.push(makeFact("relation", singleImport[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + } + + return facts; +} + +/** + * Checks if the current line index is inside a block starting with one of the given keywords. + */ +function isInsideBlock(lines: string[], currentIndex: number, ...keywords: string[]): boolean { + for (let j = currentIndex - 1; j >= Math.max(0, currentIndex - 50); j--) { + const candidate = lines[j]; + if (/^\s*\)\s*$/u.test(candidate)) { + return false; + } + for (const keyword of keywords) { + if (new RegExp(`^${keyword}\\s*\\(`, "u").test(candidate)) { + return true; + } + } + } + return false; +} + +function makeFact( + kind: CodeFactKind, + name: string, + file: string, + lineStart: number, + rawLine: string, + confidence: CodeFact["confidence"] +): CodeFact { + return { kind, name, file, lineStart, detail: rawLine.trim(), confidence, evidenceType: mapKindToEvidenceType(kind) }; +} diff --git a/src/wiki-engine/code-knowledge/extractors/index.ts b/src/wiki-engine/code-knowledge/extractors/index.ts new file mode 100644 index 0000000..19c2b17 --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/index.ts @@ -0,0 +1,49 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact } from "../code-extractors.js"; +import { extractToml, extractSql } from "./config.js"; +import { extractGo } from "./go.js"; +import { extractJava } from "./java.js"; +import { extractPython } from "./python.js"; +import { extractRust } from "./rust.js"; +import { extractTypescript } from "./typescript.js"; + +type LanguageExtractor = (files: CodeCollectedFile[]) => CodeFact[]; + +/** + * Registry mapping language identifiers to their specialized extractors. + */ +const EXTRACTOR_REGISTRY: Record = { + typescript: extractTypescript, + javascript: extractTypescript, // JS uses the same TS extractor (compatible patterns) + go: extractGo, + python: extractPython, + java: extractJava, + rust: extractRust, + toml: extractToml, + sql: extractSql, +}; + +/** + * Dispatch extraction to the appropriate language-specific extractor. + * Falls back to an empty array for unsupported languages (json, yaml, text, etc.). + */ +export function extractForLanguage(language: string, files: CodeCollectedFile[]): CodeFact[] { + const extractor = EXTRACTOR_REGISTRY[language]; + if (!extractor) { + return []; + } + return extractor(files); +} + +/** + * Returns the list of languages with registered extractors. + */ +export function supportedLanguages(): string[] { + return Object.keys(EXTRACTOR_REGISTRY); +} + +export { extractGo } from "./go.js"; +export { extractJava } from "./java.js"; +export { extractPython } from "./python.js"; +export { extractRust } from "./rust.js"; +export { extractTypescript } from "./typescript.js"; diff --git a/src/wiki-engine/code-knowledge/extractors/java.ts b/src/wiki-engine/code-knowledge/extractors/java.ts new file mode 100644 index 0000000..19f0629 --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/java.ts @@ -0,0 +1,126 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +/** + * Java extractor. + * Extracts classes, Spring annotations, interfaces, controllers, configs, errors, and imports. + */ +export function extractJava(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + let pendingAnnotations: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNumber = i + 1; + + // Collect annotations for context on the next declaration + const annotation = /^\s*@([A-Za-z]+)/u.exec(line); + if (annotation) { + pendingAnnotations.push(annotation[1]); + } + + // --- Components --- + const classDecl = /^(?:public|protected|private)?\s*(?:abstract\s+)?(?:final\s+)?class\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (classDecl) { + const isSpringComponent = pendingAnnotations.some((a) => + ["Component", "Service", "Repository", "Configuration", "Bean"].includes(a) + ); + facts.push(makeFact("component", classDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + + if (isSpringComponent) { + const springType = pendingAnnotations.find((a) => + ["Component", "Service", "Repository", "Configuration"].includes(a) + ); + if (springType) { + facts.push(makeFact("component", `@${springType}:${classDecl[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + } + + // Enum declaration + const enumDecl = /^(?:public|protected|private)?\s*enum\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (enumDecl) { + facts.push(makeFact("component", enumDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Interfaces --- + const ifaceDecl = /^(?:public|protected|private)?\s*interface\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (ifaceDecl) { + facts.push(makeFact("interface", ifaceDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Controllers and REST endpoints + const isController = pendingAnnotations.some((a) => + ["Controller", "RestController"].includes(a) + ); + if (isController && classDecl) { + facts.push(makeFact("interface", `@Controller:${classDecl[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // RequestMapping and method mappings + const requestMapping = /@(?:RequestMapping|GetMapping|PostMapping|PutMapping|DeleteMapping|PatchMapping)\s*\(\s*(?:value\s*=\s*)?["'](\/[^"']*)/u.exec(line); + if (requestMapping) { + facts.push(makeFact("interface", requestMapping[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Configs --- + const valueAnnotation = /@Value\s*\(\s*["']\$\{([^}]+)\}/u.exec(line); + if (valueAnnotation) { + facts.push(makeFact("config", valueAnnotation[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // application.properties/yml style references + const propRef = /["']([a-z][a-z0-9._-]{3,})["']/u.exec(line); + if (propRef && isConfigFile(file.relativePath)) { + facts.push(makeFact("config", propRef[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Errors --- + const errorEnum = /^(?:public|protected|private)?\s*enum\s+([A-Z][A-Za-z0-9_]*(?:Error|Code|Status))\b/u.exec(line); + if (errorEnum) { + facts.push(makeFact("error", errorEnum[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const throwStmt = /throw\s+new\s+([A-Za-z_$][\w$]*Exception)\s*\(/u.exec(line); + if (throwStmt) { + facts.push(makeFact("error", throwStmt[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const exceptionClass = /^(?:public|protected|private)?\s*class\s+([A-Z][A-Za-z0-9_]*Exception)\b/u.exec(line); + if (exceptionClass) { + facts.push(makeFact("error", exceptionClass[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Relations --- + const importStmt = /^import\s+(?:static\s+)?([a-z][\w.]*\.[A-Z][\w]*)/u.exec(line); + if (importStmt) { + facts.push(makeFact("relation", importStmt[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Reset annotations if we hit a non-annotation, non-blank line + if (!annotation && line.trim().length > 0) { + pendingAnnotations = []; + } + } + } + + return facts; +} + +function isConfigFile(relativePath: string): boolean { + return /(?:application|bootstrap|config)\.(?:properties|ya?ml)$/iu.test(relativePath); +} + +function makeFact( + kind: CodeFactKind, + name: string, + file: string, + lineStart: number, + rawLine: string, + confidence: CodeFact["confidence"] +): CodeFact { + return { kind, name, file, lineStart, detail: rawLine.trim(), confidence, evidenceType: mapKindToEvidenceType(kind) }; +} diff --git a/src/wiki-engine/code-knowledge/extractors/python.ts b/src/wiki-engine/code-knowledge/extractors/python.ts new file mode 100644 index 0000000..3397372 --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/python.ts @@ -0,0 +1,126 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +/** + * Python extractor. + * Extracts classes, module-level functions, ABC interfaces, route decorators, + * configs, errors, and import relations. + */ +export function extractPython(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNumber = i + 1; + + // --- Components --- + const classDecl = /^class\s+([A-Z][A-Za-z0-9_]*)\s*[:(]/u.exec(line); + if (classDecl && !isABCClass(line) && !isExceptionClass(line)) { + facts.push(makeFact("component", classDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Module-level function (not indented) + const funcDecl = /^(?:async\s+)?def\s+([a-z_][a-z0-9_]*)\s*\(/u.exec(line); + if (funcDecl) { + facts.push(makeFact("component", funcDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Interfaces --- + if (isABCClass(line)) { + const abcClass = /^class\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (abcClass) { + facts.push(makeFact("interface", abcClass[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + + // Flask/FastAPI route decorators + const flaskRoute = /@app\.route\s*\(\s*["'](\/[^"']*)/u.exec(line); + if (flaskRoute) { + facts.push(makeFact("interface", flaskRoute[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const fastapiRoute = /@(?:router|app)\.\s*(get|post|put|patch|delete)\s*\(\s*["'](\/[^"']*)/u.exec(line); + if (fastapiRoute) { + facts.push(makeFact("interface", `${fastapiRoute[1].toUpperCase()} ${fastapiRoute[2]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Protocol class (typing) + const protocolClass = /^class\s+([A-Z][A-Za-z0-9_]*)\s*\(.*Protocol.*\)/u.exec(line); + if (protocolClass) { + facts.push(makeFact("interface", protocolClass[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Configs --- + const osEnviron = /os\.environ\s*(?:\[["']|\.get\s*\(\s*["'])([A-Z][A-Z0-9_]+)/u.exec(line); + if (osEnviron) { + facts.push(makeFact("config", osEnviron[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const dotenvRead = /(?:config|settings|environ)\s*(?:\[["']|\.get\s*\(\s*["']|\.)\s*([A-Z][A-Z0-9_]{2,})/u.exec(line); + if (dotenvRead && !osEnviron) { + facts.push(makeFact("config", dotenvRead[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // Settings patterns (e.g., SETTING_NAME = ...) + const settingsPattern = /^([A-Z][A-Z0-9_]{3,})\s*[:=]\s*.+/u.exec(line); + if (settingsPattern && isSettingsFile(file.relativePath)) { + facts.push(makeFact("config", settingsPattern[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Errors --- + if (isExceptionClass(line)) { + const errClass = /^class\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (errClass) { + facts.push(makeFact("error", errClass[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + + const raiseStmt = /raise\s+([A-Z][A-Za-z0-9_]*(?:Error|Exception)?)\s*\(/u.exec(line); + if (raiseStmt) { + facts.push(makeFact("error", raiseStmt[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Relations --- + const fromImport = /^from\s+([\w.]+)\s+import\s+(.+)/u.exec(line); + if (fromImport) { + const modulePath = fromImport[1]; + const names = fromImport[2].split(",").map((n) => n.trim().split(/\s+as\s+/)[0].trim()).filter(Boolean); + for (const name of names) { + facts.push(makeFact("relation", `${modulePath}.${name}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + + const importModule = /^import\s+([\w.]+)/u.exec(line); + if (importModule && !fromImport) { + facts.push(makeFact("relation", importModule[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + } + + return facts; +} + +function isABCClass(line: string): boolean { + return /^class\s+\w+\s*\(.*(?:ABC|ABCMeta|metaclass\s*=\s*ABCMeta).*\)/u.test(line); +} + +function isExceptionClass(line: string): boolean { + return /^class\s+\w+\s*\(.*(?:Exception|Error|BaseException).*\)/u.test(line); +} + +function isSettingsFile(relativePath: string): boolean { + return /(?:settings|config|constants|env)\.py$/iu.test(relativePath); +} + +function makeFact( + kind: CodeFactKind, + name: string, + file: string, + lineStart: number, + rawLine: string, + confidence: CodeFact["confidence"] +): CodeFact { + return { kind, name, file, lineStart, detail: rawLine.trim(), confidence, evidenceType: mapKindToEvidenceType(kind) }; +} diff --git a/src/wiki-engine/code-knowledge/extractors/rust.ts b/src/wiki-engine/code-knowledge/extractors/rust.ts new file mode 100644 index 0000000..7a71118 --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/rust.ts @@ -0,0 +1,143 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +/** + * Rust extractor. + * Extracts structs, impls, modules, traits, HTTP handlers, configs, errors, and use relations. + */ +export function extractRust(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + let pendingAttributes: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNumber = i + 1; + + // Collect attributes for context + const attrMatch = /^\s*#\[([^\]]+)\]/u.exec(line); + if (attrMatch) { + pendingAttributes.push(attrMatch[1]); + // Don't continue — attribute line might also contain other patterns + } + + // --- Components --- + const pubStruct = /^pub(?:\(crate\))?\s+struct\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (pubStruct) { + facts.push(makeFact("component", pubStruct[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const implBlock = /^impl(?:<[^>]*>)?\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (implBlock && !/\bfor\b/u.test(line)) { + facts.push(makeFact("component", `impl:${implBlock[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const modDecl = /^pub(?:\(crate\))?\s+mod\s+([a-z][a-z0-9_]*)/u.exec(line); + if (modDecl) { + facts.push(makeFact("component", `mod:${modDecl[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const privateMod = /^mod\s+([a-z][a-z0-9_]*)\s*;/u.exec(line); + if (privateMod) { + facts.push(makeFact("component", `mod:${privateMod[1]}`, file.relativePath, lineNumber, line, "INFERRED")); + } + + const pubFn = /^pub(?:\(crate\))?\s+(?:async\s+)?fn\s+([a-z_][a-z0-9_]*)/u.exec(line); + if (pubFn) { + facts.push(makeFact("component", pubFn[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Interfaces --- + const traitDecl = /^pub(?:\(crate\))?\s+trait\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (traitDecl) { + facts.push(makeFact("interface", traitDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Trait impl (impl Trait for Type) + const traitImpl = /^impl(?:<[^>]*>)?\s+([A-Z][A-Za-z0-9_]*)\s+for\s+([A-Z][A-Za-z0-9_]*)/u.exec(line); + if (traitImpl) { + facts.push(makeFact("interface", `${traitImpl[2]}:impl:${traitImpl[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Actix/Axum HTTP handlers: #[get("/")] async fn handler + const httpAttr = pendingAttributes.find((a) => /^(?:get|post|put|patch|delete)\s*\(/iu.test(a)); + if (httpAttr && pubFn) { + const routePath = /\(\s*["'](\/[^"']*)/u.exec(httpAttr); + if (routePath) { + facts.push(makeFact("interface", `${httpAttr.split("(")[0].toUpperCase()} ${routePath[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + } + + // Router registrations: .route("/path", get(handler)) + const routeReg = /\.route\s*\(\s*["'](\/[^"']*)/u.exec(line); + if (routeReg) { + facts.push(makeFact("interface", routeReg[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Configs --- + const stdEnvVar = /std::env::var\s*\(\s*["']([A-Z][A-Z0-9_]+)["']\s*\)/u.exec(line); + if (stdEnvVar) { + facts.push(makeFact("config", stdEnvVar[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const envVar = /env::var\s*\(\s*["']([A-Z][A-Z0-9_]+)["']\s*\)/u.exec(line); + if (envVar && !stdEnvVar) { + facts.push(makeFact("config", envVar[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Config structs in config.rs files + if (isConfigFile(file.relativePath) && pubStruct) { + facts.push(makeFact("config", `config:${pubStruct[1]}`, file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Errors --- + const thiserror = pendingAttributes.some((a) => /derive\(.*thiserror::Error/u.test(a) || /derive\(.*Error/u.test(a)); + const errorEnum = /^pub(?:\(crate\))?\s+enum\s+([A-Z][A-Za-z0-9_]*(?:Error)?)/u.exec(line); + if (errorEnum && thiserror) { + facts.push(makeFact("error", errorEnum[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } else if (errorEnum && /Error$/u.test(errorEnum[1])) { + facts.push(makeFact("error", errorEnum[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + const errorStruct = /^pub(?:\(crate\))?\s+struct\s+([A-Z][A-Za-z0-9_]*Error)\b/u.exec(line); + if (errorStruct) { + facts.push(makeFact("error", errorStruct[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Relations --- + const useDecl = /^use\s+([a-z_][\w:]*(?:::\{[^}]+\}|::\*|::[A-Z]\w*))/u.exec(line); + if (useDecl) { + facts.push(makeFact("relation", useDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const externCrate = /^extern\s+crate\s+([a-z_][a-z0-9_]*)/u.exec(line); + if (externCrate) { + facts.push(makeFact("relation", externCrate[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Reset attributes on non-attribute, non-blank lines + if (!attrMatch && line.trim().length > 0) { + pendingAttributes = []; + } + } + } + + return facts; +} + +function isConfigFile(relativePath: string): boolean { + return /(?:config|settings)\.rs$/iu.test(relativePath); +} + +function makeFact( + kind: CodeFactKind, + name: string, + file: string, + lineStart: number, + rawLine: string, + confidence: CodeFact["confidence"] +): CodeFact { + return { kind, name, file, lineStart, detail: rawLine.trim(), confidence, evidenceType: mapKindToEvidenceType(kind) }; +} diff --git a/src/wiki-engine/code-knowledge/extractors/typescript.ts b/src/wiki-engine/code-knowledge/extractors/typescript.ts new file mode 100644 index 0000000..7c3c566 --- /dev/null +++ b/src/wiki-engine/code-knowledge/extractors/typescript.ts @@ -0,0 +1,102 @@ +import { type CodeCollectedFile } from "../code-collector.js"; +import { type CodeFact, type CodeFactKind, mapKindToEvidenceType } from "../code-extractors.js"; + +/** + * Enhanced TypeScript/JavaScript extractor. + * Extracts components, interfaces/types, configs, errors, and relations. + */ +export function extractTypescript(files: CodeCollectedFile[]): CodeFact[] { + const facts: CodeFact[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNumber = i + 1; + + // --- Components --- + const exportClass = /^export\s+(?:default\s+)?(?:abstract\s+)?class\s+([A-Za-z_$][\w$]*)/u.exec(line); + if (exportClass) { + facts.push(makeFact("component", exportClass[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const exportFunction = /^export\s+(?:default\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)/u.exec(line); + if (exportFunction) { + facts.push(makeFact("component", exportFunction[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const exportConst = /^export\s+const\s+([A-Za-z_$][\w$]*)\s*=/u.exec(line); + if (exportConst && !/CONFIG|DEFAULT|OPTION|SETTING|ENV/u.test(exportConst[1])) { + facts.push(makeFact("component", exportConst[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const exportDefault = /^export\s+default\s+(?!class|function|abstract)([A-Za-z_$][\w$]*)/u.exec(line); + if (exportDefault) { + facts.push(makeFact("component", exportDefault[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Interfaces / Types --- + const iface = /^export\s+(?:declare\s+)?interface\s+([A-Za-z_$][\w$]*)/u.exec(line); + if (iface) { + facts.push(makeFact("interface", iface[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const typeAlias = /^export\s+(?:declare\s+)?type\s+([A-Za-z_$][\w$]*)\s*[=<]/u.exec(line); + if (typeAlias) { + facts.push(makeFact("interface", typeAlias[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // Route definitions + const route = /(?:router|app|server)\.\s*(get|post|put|patch|delete|all|use)\s*\(\s*["'`](\/[^"'`]*)/iu.exec(line); + if (route) { + facts.push(makeFact("interface", `${route[1].toUpperCase()} ${route[2]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Configs --- + const envVar = /process\.env\.([A-Z][A-Z0-9_]{2,})/u.exec(line); + if (envVar) { + facts.push(makeFact("config", `process.env.${envVar[1]}`, file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const configConst = /^export\s+const\s+([A-Z][A-Z0-9_]*(?:CONFIG|DEFAULT|OPTION|SETTING|ENV)[A-Z0-9_]*)\s*=/u.exec(line); + if (configConst) { + facts.push(makeFact("config", configConst[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + // --- Errors --- + const throwNew = /throw\s+new\s+([A-Za-z_$][\w$]*Error)\b/u.exec(line); + if (throwNew) { + facts.push(makeFact("error", throwNew[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const errorConst = /\b([A-Z][A-Z0-9_]*(?:ERROR|ERR|FAILED|FAILURE)[A-Z0-9_]*)\b/u.exec(line); + if (errorConst && !throwNew) { + facts.push(makeFact("error", errorConst[1], file.relativePath, lineNumber, line, "INFERRED")); + } + + // --- Relations --- + const importFrom = /^import\s+.*?from\s+["']([^"']+)["']/u.exec(line); + if (importFrom) { + facts.push(makeFact("relation", importFrom[1], file.relativePath, lineNumber, line, "EXTRACTED")); + } + + const dynamicImport = /(?:await\s+)?import\s*\(\s*["']([^"']+)["']\s*\)/u.exec(line); + if (dynamicImport && !importFrom) { + facts.push(makeFact("relation", dynamicImport[1], file.relativePath, lineNumber, line, "INFERRED")); + } + } + } + + return facts; +} + +function makeFact( + kind: CodeFactKind, + name: string, + file: string, + lineStart: number, + rawLine: string, + confidence: CodeFact["confidence"] +): CodeFact { + return { kind, name, file, lineStart, detail: rawLine.trim(), confidence, evidenceType: mapKindToEvidenceType(kind) }; +} diff --git a/src/wiki-engine/core/graph-index.schema.ts b/src/wiki-engine/core/graph-index.schema.ts new file mode 100644 index 0000000..b6ec260 --- /dev/null +++ b/src/wiki-engine/core/graph-index.schema.ts @@ -0,0 +1,418 @@ +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import path from "node:path"; + +import { CONFIDENCE_SCORE_DEFAULTS, type WikiCategory, type WikiConfidence, type WikiEvidence } from "./wiki-protocol.js"; + +/** + * Graph Index Schema — team-wiki.graph-index.v1 + * + * Formal schema for knowledge graph indices that capture + * relationships between wiki pages and code entities. + */ + +export const GRAPH_INDEX_SCHEMA_VERSION = "team-wiki.graph-index.v1" as const; + +export type RelationType = + | "DEPENDS_ON" + | "IMPLEMENTS" + | "MAPS_TO" + | "CONTAINS" + | "REFERENCES" + | "CONFLICTS_WITH" + | "SUPERSEDES"; + +export const RELATION_TYPES: RelationType[] = [ + "DEPENDS_ON", + "IMPLEMENTS", + "MAPS_TO", + "CONTAINS", + "REFERENCES", + "CONFLICTS_WITH", + "SUPERSEDES" +]; + +export interface GraphNode { + slug: string; + type: WikiCategory; + confidence: WikiConfidence; + title: string; + domain?: string; +} + +/** Provenance of a graph edge (compile / reconcile pipeline). */ +export type GraphEdgeSource = + | "code-ast" + | "code-heuristic" + | "doc-structure" + | "doc-entity" + | "doc-triples" + | "bridge-reconcile" + | "doc-semantic" + | "manual-mapping"; + +export interface GraphEdge { + from: string; + to: string; + relation: RelationType; + evidence?: WikiEvidence[]; + weight?: number; + /** Fine-grained semantic predicate (e.g. G6 CALLS_HTTP, USES_TABLE). */ + predicate?: string; + source?: GraphEdgeSource; +} + +/** Wiki page slug: relative path without `.md`. */ +export function toPageSlug(relativePath: string): string { + return relativePath.replace(/\.md$/u, "").replace(/\\/g, "/"); +} + +export interface GraphIndex { + schemaVersion: typeof GRAPH_INDEX_SCHEMA_VERSION; + generatedAt: string; + nodes: GraphNode[]; + edges: GraphEdge[]; +} + +/** + * Create an empty GraphIndex with the current timestamp. + */ +export function createGraphIndex(nodes: GraphNode[] = [], edges: GraphEdge[] = []): GraphIndex { + return { + schemaVersion: GRAPH_INDEX_SCHEMA_VERSION, + generatedAt: new Date().toISOString(), + nodes, + edges, + }; +} + +/** + * Add a node to the graph index. If a node with the same slug already exists, + * it is replaced with the new node. + */ +export function addNode(graph: GraphIndex, node: GraphNode): GraphIndex { + const filtered = graph.nodes.filter((n) => n.slug !== node.slug); + return { ...graph, nodes: [...filtered, node] }; +} + +/** + * Add an edge to the graph index. Duplicate edges (same from, to, relation) are not added. + */ +export function addEdge(graph: GraphIndex, edge: GraphEdge): GraphIndex { + const exists = graph.edges.some( + (e) => e.from === edge.from && e.to === edge.to && e.relation === edge.relation + ); + if (exists) { + return graph; + } + return { ...graph, edges: [...graph.edges, edge] }; +} + +/** + * Add an edge using confidence level as weight when no explicit weight is provided. + * Falls back to CONFIDENCE_SCORE_DEFAULTS for the given confidence level. + */ +export function addEdgeWithConfidence( + graph: GraphIndex, + edge: Omit & { weight?: number }, + confidence: WikiConfidence +): GraphIndex { + const weight = edge.weight ?? CONFIDENCE_SCORE_DEFAULTS[confidence]; + return addEdge(graph, { ...edge, weight }); +} + +/** + * Find all neighbor slugs of a given node (connected via any edge direction). + */ +export function findNeighbors(graph: GraphIndex, slug: string): string[] { + const neighbors = new Set(); + for (const edge of graph.edges) { + if (edge.from === slug) { + neighbors.add(edge.to); + } + if (edge.to === slug) { + neighbors.add(edge.from); + } + } + return [...neighbors].sort(); +} + +/** + * Find all neighbor slugs reachable within N hops. + * Optionally filter by specific relation types. + * Uses BFS to expand outward from the starting node. + */ +export function findNeighborsNHop( + graph: GraphIndex, + slug: string, + hops: number, + filterRelations?: RelationType[] +): string[] { + const visited = new Set([slug]); + let frontier = new Set([slug]); + + for (let hop = 0; hop < hops; hop++) { + const nextFrontier = new Set(); + for (const current of frontier) { + for (const edge of graph.edges) { + if (filterRelations && !filterRelations.includes(edge.relation)) { + continue; + } + let neighbor: string | null = null; + if (edge.from === current && !visited.has(edge.to)) { + neighbor = edge.to; + } else if (edge.to === current && !visited.has(edge.from)) { + neighbor = edge.from; + } + if (neighbor) { + visited.add(neighbor); + nextFrontier.add(neighbor); + } + } + } + frontier = nextFrontier; + if (frontier.size === 0) break; + } + + visited.delete(slug); // Remove starting node from results + return [...visited].sort(); +} + +export interface GraphValidationIssue { + code: "node.duplicate" | "edge.missing_node" | "edge.self_loop" | "edge.invalid_weight"; + message: string; +} + +export interface GraphValidationResult { + valid: boolean; + issues: GraphValidationIssue[]; +} + +/** + * Validate a graph index for structural correctness: + * - No duplicate node slugs + * - All edge endpoints reference existing nodes + * - No self-loop edges + * - Edge weights (if provided) are between 0 and 1 + */ +export function validateGraph(graph: GraphIndex): GraphValidationResult { + const issues: GraphValidationIssue[] = []; + const slugs = new Set(); + + for (const node of graph.nodes) { + if (slugs.has(node.slug)) { + issues.push({ + code: "node.duplicate", + message: `Duplicate node slug: ${node.slug}`, + }); + } + slugs.add(node.slug); + } + + for (const edge of graph.edges) { + if (!slugs.has(edge.from)) { + issues.push({ + code: "edge.missing_node", + message: `Edge references non-existent source node: ${edge.from}`, + }); + } + if (!slugs.has(edge.to)) { + issues.push({ + code: "edge.missing_node", + message: `Edge references non-existent target node: ${edge.to}`, + }); + } + if (edge.from === edge.to) { + issues.push({ + code: "edge.self_loop", + message: `Self-loop edge on node: ${edge.from}`, + }); + } + if (edge.weight !== undefined && (edge.weight < 0 || edge.weight > 1)) { + issues.push({ + code: "edge.invalid_weight", + message: `Edge weight out of range [0,1]: ${edge.from} -> ${edge.to} (${edge.weight})`, + }); + } + } + + return { valid: issues.length === 0, issues }; +} + +/** + * Graph Health Metrics — a summary of overall graph quality. + */ +export interface GraphHealthMetrics { + healthScore: number; // 0-100 + connectivity: number; // largest connected component / total nodes (0-1) + density: number; // edges / nodes ratio + freshness: number; // nodes with usable status / total (0-1) + confidenceRatio: number; // edges with weight >= 0.8 / total edges (0-1) + nodeCount: number; + edgeCount: number; + orphanNodes: number; // nodes with no edges + brokenEdges: number; // edges referencing non-existent nodes +} + +/** + * Compute health metrics for a graph index. + * + * - connectivity: BFS from first node, count reachable / total + * - density: edges.length / max(nodes.length, 1) + * - freshness: simplified — nodeCount > 0 ? 1.0 : 0 (full impl needs status data) + * - confidenceRatio: edges with weight >= 0.8 / total edges + * - healthScore = connectivity*30 + (density>1.5?20:density/1.5*20) + freshness*25 + confidenceRatio*25 + * - orphanNodes: nodes not referenced in any edge (from or to) + * - brokenEdges: edges where from or to is not in nodes + */ +export function computeGraphHealth(graph: GraphIndex): GraphHealthMetrics { + const nodeCount = graph.nodes.length; + const edgeCount = graph.edges.length; + const slugSet = new Set(graph.nodes.map((n) => n.slug)); + + // Connectivity: BFS/DFS from first node + let connectivity = 0; + if (nodeCount > 0) { + const adjacency = new Map>(); + for (const node of graph.nodes) { + adjacency.set(node.slug, new Set()); + } + for (const edge of graph.edges) { + if (slugSet.has(edge.from) && slugSet.has(edge.to)) { + adjacency.get(edge.from)!.add(edge.to); + adjacency.get(edge.to)!.add(edge.from); + } + } + + // BFS from the first node + const visited = new Set(); + const queue: string[] = [graph.nodes[0].slug]; + visited.add(graph.nodes[0].slug); + while (queue.length > 0) { + const current = queue.shift()!; + const neighbors = adjacency.get(current); + if (neighbors) { + for (const neighbor of neighbors) { + if (!visited.has(neighbor)) { + visited.add(neighbor); + queue.push(neighbor); + } + } + } + } + connectivity = visited.size / nodeCount; + } + + // Density + const density = edgeCount / Math.max(nodeCount, 1); + + // Freshness: simplified — if there are nodes, assume 1.0 + const freshness = nodeCount > 0 ? 1.0 : 0; + + // Confidence ratio: edges with weight >= 0.8 / total edges + let confidenceRatio = 0; + if (edgeCount > 0) { + const highConfidenceEdges = graph.edges.filter((e) => (e.weight ?? 0) >= 0.8).length; + confidenceRatio = highConfidenceEdges / edgeCount; + } + + // Orphan nodes: nodes not referenced in any edge + const referencedSlugs = new Set(); + for (const edge of graph.edges) { + referencedSlugs.add(edge.from); + referencedSlugs.add(edge.to); + } + const orphanNodes = graph.nodes.filter((n) => !referencedSlugs.has(n.slug)).length; + + // Broken edges: edges where from or to is not in nodes + const brokenEdges = graph.edges.filter((e) => !slugSet.has(e.from) || !slugSet.has(e.to)).length; + + // Health score + const densityScore = density > 1.5 ? 20 : (density / 1.5) * 20; + const healthScore = connectivity * 30 + densityScore + freshness * 25 + confidenceRatio * 25; + + return { + healthScore, + connectivity, + density, + freshness, + confidenceRatio, + nodeCount, + edgeCount, + orphanNodes, + brokenEdges, + }; +} + +/** + * Load graph-index.json from the wiki's indices directory. + * Returns null if the file doesn't exist. + */ +export async function loadGraphIndex(wikiRoot: string): Promise { + const paths = [ + path.join(wikiRoot, ".teamwiki", ".indices", "graph-index.json"), + path.join(wikiRoot, ".indices", "graph-index.json"), + path.join(wikiRoot, "graph", "graph-index.json"), + ]; + for (const p of paths) { + try { + const raw = await readFile(p, "utf8"); + return JSON.parse(raw) as GraphIndex; + } catch { /* continue */ } + } + return null; +} + +/** + * Save graph-index.json to the wiki's indices directory. + */ +export async function saveGraphIndex(wikiRoot: string, graph: GraphIndex): Promise { + const dir = path.join(wikiRoot, ".teamwiki", ".indices"); + await mkdir(dir, { recursive: true }); + const outPath = path.join(dir, "graph-index.json"); + await writeFile(outPath, JSON.stringify(graph, null, 2), "utf8"); + return outPath; +} + +/** + * Merge two graphs: overlay nodes replace base nodes with same slug. + * + * Edges are deduplicated by `from|to|relation`. When a duplicate is encountered, + * the variant carrying richer evidence wins (overlay-preferred on ties). This + * matters for v1→v2 manifest upgrades: a re-compile that supplies real evidence + * must not be discarded just because an older empty-evidence edge was written + * to the persisted graph first. + */ +export function mergeGraphs(base: GraphIndex, overlay: GraphIndex): GraphIndex { + const nodeMap = new Map(); + const nodeKey = (n: GraphNode) => n.slug ?? (n as unknown as { id?: string }).id ?? `${n.title}:${n.type}`; + for (const n of base.nodes) nodeMap.set(nodeKey(n), n); + for (const n of overlay.nodes) nodeMap.set(nodeKey(n), n); // overlay wins + + const edgeKey = (e: GraphEdge) => `${e.from}|${e.to}|${e.relation}`; + const edgeMap = new Map(); + + const evidenceLen = (e: GraphEdge) => e.evidence?.length ?? 0; + + for (const e of base.edges) { + edgeMap.set(edgeKey(e), e); + } + for (const e of overlay.edges) { + const key = edgeKey(e); + const existing = edgeMap.get(key); + if (!existing) { + edgeMap.set(key, e); + continue; + } + // Prefer the variant with more evidence; on ties, prefer overlay. + if (evidenceLen(e) >= evidenceLen(existing)) { + edgeMap.set(key, e); + } + } + + return { + schemaVersion: GRAPH_INDEX_SCHEMA_VERSION, + generatedAt: new Date().toISOString(), + nodes: [...nodeMap.values()], + edges: [...edgeMap.values()], + }; +} diff --git a/src/wiki-engine/core/wiki-protocol.ts b/src/wiki-engine/core/wiki-protocol.ts new file mode 100644 index 0000000..3e446a0 --- /dev/null +++ b/src/wiki-engine/core/wiki-protocol.ts @@ -0,0 +1,197 @@ +import path from "node:path"; + +export type WikiCategory = + | "architecture" + | "component" + | "interface" + | "flow" + | "data" + | "config" + | "error" + | "rule" + | "style" + | "mapping" + | "decision" + | "process" + | "source" + | "query" + | "incident"; + +export type WikiConfidence = "EXTRACTED" | "INFERRED" | "AMBIGUOUS"; +export type WikiReviewState = "draft" | "needs-review" | "accepted"; +export type WikiPageStatus = "draft" | "usable" | "stale" | "deprecated"; + +export const CONFIDENCE_SCORE_DEFAULTS: Record = { + EXTRACTED: 1.0, + INFERRED: 0.75, + AMBIGUOUS: 0.2 +}; + +export type WikiEvidenceType = "definition" | "implementation" | "usage" | "schema" | "config"; + +export interface WikiEvidence { + ref: string; + lineStart?: number; + lineEnd?: number; + commit?: string; + type?: WikiEvidenceType; + /** + * Optional human-readable note explaining the evidence — e.g. why a graph + * edge connects two components. Used by manifest v2 edge.reason translation. + * Renderers that don't recognise this field MUST ignore it (forward-compatible). + */ + note?: string; +} + +export interface WikiPageMetadata { + title: string; + category: WikiCategory; + domain?: string; + project?: string; + tags: string[]; + sources: string[]; + evidence: WikiEvidence[]; + confidence: WikiConfidence; + confidenceScore?: number; + reviewState: WikiReviewState; + status?: WikiPageStatus; + deprecatedBy?: string; + sourceHash?: Record; + created: string; + updated: string; +} + +export interface WikiPageDraft { + slug?: string; + relativePath?: string; + metadata: WikiPageMetadata; + summary?: string; + body: string; + related?: string[]; +} + +export interface LocalAiCommandIssue { + kind: string; + message: string; + sources?: string[]; + refs?: string[]; +} + +export interface LocalAiCommandResult { + ok: boolean; + dryRun: boolean; + command: string; + summary: string; + progressPath?: string; + createdPages: string[]; + updatedPages: string[]; + gaps: Array<{ kind: string; message: string; sources: string[] }>; + conflicts: Array<{ kind: string; message: string; sources: string[] }>; + needsReview: Array<{ kind: string; message: string; refs: string[] }>; + nextActions: string[]; +} + +export type LocalCompilePhase = + | "idle" + | "scanning_code" + | "extracting_facts" + | "writing_wiki_pages" + | "compiling_docs" + | "reconciling" + | "building_context" + | "linting" + | "done" + | "failed"; + +export interface LocalCompileProgress { + phase: LocalCompilePhase; + project: string; + startedAt?: string; + updatedAt: string; + createdPages: string[]; + updatedPages: string[]; + gaps: LocalAiCommandResult["gaps"]; + conflicts: LocalAiCommandResult["conflicts"]; + needsReview: LocalAiCommandResult["needsReview"]; + nextActions: string[]; +} + +export const WIKI_CATEGORIES: WikiCategory[] = [ + "architecture", + "component", + "interface", + "flow", + "data", + "config", + "error", + "rule", + "style", + "mapping", + "decision", + "process", + "source", + "query", + "incident" +]; + +const SAFE_IGNORE_SEGMENTS = new Set([ + ".git", + ".teamwiki", + "node_modules", + "dist", + "build", + ".venv", + "venv", + "coverage", + ".next", + ".turbo" +]); + +const SENSITIVE_FILE_NAMES = new Set(["credentials.json"]); + +export function safeIgnore(filePath: string): boolean { + const normalized = toPosix(filePath); + // Compiled code evidence pages live under .teamwiki/evidence/ and must be writable. + if (normalized.startsWith(".teamwiki/evidence/")) { + return false; + } + const parts = normalized.split("/").filter(Boolean); + if (parts.some((part) => SAFE_IGNORE_SEGMENTS.has(part))) { + return true; + } + const base = parts.at(-1) ?? ""; + if (base.startsWith(".env") || SENSITIVE_FILE_NAMES.has(base)) { + return true; + } + return /\.(pem|key|p12|pfx)$/i.test(base); +} + +export function slugifyWiki(value: string): string { + const slug = value + .toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/gu, "-") + .replace(/^-+|-+$/g, ""); + return slug || "untitled"; +} + +export function wikiPagePath(page: Pick): string { + if (page.relativePath) { + return normalizeRelativePagePath(page.relativePath); + } + const domain = page.metadata.domain ?? page.metadata.project ?? "general"; + const slug = page.slug ?? slugifyWiki(page.metadata.title); + return normalizeRelativePagePath(path.join(domain, `${page.metadata.category}s`, `${slug}.md`)); +} + +export function normalizeRelativePagePath(value: string): string { + const normalized = toPosix(value).replace(/^\/+/, ""); + return normalized.endsWith(".md") ? normalized : `${normalized}.md`; +} + +export function wikiLinkTarget(relativePath: string): string { + return normalizeRelativePagePath(relativePath).replace(/\.md$/i, ""); +} + +export function toPosix(value: string): string { + return value.split(path.sep).join("/"); +} diff --git a/src/wiki-engine/doc-graph-extractor.ts b/src/wiki-engine/doc-graph-extractor.ts new file mode 100644 index 0000000..7e2bf06 --- /dev/null +++ b/src/wiki-engine/doc-graph-extractor.ts @@ -0,0 +1,231 @@ +import type { GraphEdge, GraphNode } from './core/graph-index.schema.js'; +import { CONFIDENCE_SCORE_DEFAULTS, slugifyWiki, type WikiCategory, type WikiEvidence } from './core/wiki-protocol.js'; + +function extractWikiLinks(content: string): string[] { + const links: string[] = []; + const pattern = /\[\[([^\]]+)\]\]/g; + let match: RegExpExecArray | null; + while ((match = pattern.exec(content)) !== null) { + const link = match[1].trim(); + if (link) { + links.push(link); + } + } + return links; +} + +export interface DocGraphExtraction { + nodes: GraphNode[]; + edges: GraphEdge[]; +} + +export interface ExtractDocStructureOptions { + pageCategory?: WikiCategory; + pageTitle?: string; + domain?: string; +} + +/** + * Section node slugs use `{pageSlug}#{section-slug}` (see GRAPH-CAPABILITIES.md). + */ +export function sectionNodeSlug(pageSlug: string, sectionSlug: string): string { + return `${pageSlug}#${sectionSlug}`; +} + +export function extractDocStructure( + content: string, + pageSlug: string, + pageRelativePath: string, + options: ExtractDocStructureOptions = {} +): DocGraphExtraction { + const nodes: GraphNode[] = []; + const edges: GraphEdge[] = []; + const category = options.pageCategory ?? "source"; + const domain = options.domain ?? "product"; + const title = options.pageTitle ?? pageSlug; + + const pageNode: GraphNode = { + slug: pageSlug, + type: category, + confidence: "EXTRACTED", + title, + domain + }; + nodes.push(pageNode); + + const sectionSlugCounts = new Map(); + const headingPattern = /^#{2,3}\s+(.+)$/gm; + let match: RegExpExecArray | null; + while ((match = headingPattern.exec(content)) !== null) { + const heading = match[1].trim(); + if (!heading) { + continue; + } + const baseSectionSlug = slugifyWiki(heading); + const count = (sectionSlugCounts.get(baseSectionSlug) ?? 0) + 1; + sectionSlugCounts.set(baseSectionSlug, count); + const sectionSlug = count > 1 ? `${baseSectionSlug}-${count}` : baseSectionSlug; + const sectionId = sectionNodeSlug(pageSlug, sectionSlug); + const lineStart = lineNumberAt(content, match.index); + + nodes.push({ + slug: sectionId, + type: category, + confidence: "EXTRACTED", + title: heading, + domain + }); + edges.push({ + from: pageSlug, + to: sectionId, + relation: "CONTAINS", + weight: CONFIDENCE_SCORE_DEFAULTS.EXTRACTED, + evidence: docEvidence(pageRelativePath, lineStart, "doc-structure section") + }); + } + + for (const link of extractWikiLinks(content)) { + const targetSlug = wikiLinkToPageSlug(link); + if (!targetSlug || targetSlug === pageSlug) { + continue; + } + const lineStart = findLinkLine(content, link); + edges.push({ + from: pageSlug, + to: targetSlug, + relation: "REFERENCES", + weight: CONFIDENCE_SCORE_DEFAULTS.EXTRACTED, + evidence: docEvidence(pageRelativePath, lineStart, `doc-structure wiki link [[${link}]]`) + }); + } + + return dedupeExtraction({ nodes, edges }); +} + +export function extractDocEntities( + content: string, + pageSlug: string, + pageRelativePath: string +): DocGraphExtraction { + const nodes: GraphNode[] = []; + const edges: GraphEdge[] = []; + const seenEntitySlugs = new Set(); + + const apiPattern = /(GET|POST|PUT|DELETE|PATCH)\s+(\/v?\d*\/[a-z0-9/_\-{}:.]+)/gi; + let match: RegExpExecArray | null; + while ((match = apiPattern.exec(content)) !== null) { + const method = match[1].toUpperCase(); + const apiPath = match[2].toLowerCase(); + const entitySlug = entitySlugFor("api", `${method}-${apiPath}`); + addEntity(entitySlug, "interface", `${method} ${apiPath}`, match.index); + } + + const errPattern = /\b(Err\d{3,8})\b/gi; + while ((match = errPattern.exec(content)) !== null) { + const code = match[1]; + addEntity(entitySlugFor("error", code.toLowerCase()), "error", code, match.index); + } + + const errRangePattern = /\b(Err\d{3,8})\s*[-–—]\s*(Err\d{3,8})\b/gi; + while ((match = errRangePattern.exec(content)) !== null) { + const rangeLabel = `${match[1]}-${match[2]}`; + addEntity(entitySlugFor("error-range", rangeLabel.toLowerCase()), "error", rangeLabel, match.index); + } + + const configBacktickPattern = /`([A-Z][A-Z0-9_]{2,})`/g; + while ((match = configBacktickPattern.exec(content)) !== null) { + const key = match[1]; + addEntity(entitySlugFor("config", key.toLowerCase()), "config", key, match.index); + } + + const configAssignPattern = /(?:^|\n)\s*([a-z][a-z0-9_.-]{2,})\s*[:=]\s*/gim; + while ((match = configAssignPattern.exec(content)) !== null) { + const key = match[1]; + if (/^(http|https|get|post|put|delete|patch)$/i.test(key)) { + continue; + } + addEntity(entitySlugFor("config", key.toLowerCase()), "config", key, match.index); + } + + return dedupeExtraction({ nodes, edges }); + + function addEntity(entitySlug: string, type: WikiCategory, title: string, index: number): void { + if (seenEntitySlugs.has(entitySlug)) { + const existingEdge = edges.find((e) => e.from === pageSlug && e.to === entitySlug && e.relation === "REFERENCES"); + if (!existingEdge) { + edges.push({ + from: pageSlug, + to: entitySlug, + relation: "REFERENCES", + weight: CONFIDENCE_SCORE_DEFAULTS.INFERRED, + evidence: docEvidence(pageRelativePath, lineNumberAt(content, index), "doc-entity") + }); + } + return; + } + seenEntitySlugs.add(entitySlug); + nodes.push({ + slug: entitySlug, + type, + confidence: type === "interface" ? "EXTRACTED" : "INFERRED", + title, + domain: "product" + }); + edges.push({ + from: pageSlug, + to: entitySlug, + relation: "REFERENCES", + weight: type === "interface" ? CONFIDENCE_SCORE_DEFAULTS.EXTRACTED : CONFIDENCE_SCORE_DEFAULTS.INFERRED, + evidence: docEvidence(pageRelativePath, lineNumberAt(content, index), "doc-entity") + }); + } +} + +export function wikiLinkToPageSlug(link: string): string { + const clean = link.trim().replace(/^\/+/, "").replace(/\.md$/i, ""); + const last = clean.split("/").filter(Boolean).pop(); + if (!last) { + return slugifyWiki(clean); + } + return slugifyWiki(last); +} + +export function entitySlugFor(kind: string, anchor: string): string { + const normalized = anchor + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, ""); + return `doc-entity:${kind}:${normalized || "unknown"}`; +} + +function docEvidence(ref: string, lineStart?: number, note?: string): WikiEvidence[] { + return [{ ref, lineStart, note }]; +} + +function lineNumberAt(content: string, index: number): number { + return content.slice(0, index).split("\n").length; +} + +function findLinkLine(content: string, link: string): number | undefined { + const needle = `[[${link}]]`; + const index = content.indexOf(needle); + return index >= 0 ? lineNumberAt(content, index) : undefined; +} + +function dedupeExtraction(extraction: DocGraphExtraction): DocGraphExtraction { + const nodeMap = new Map(); + for (const node of extraction.nodes) { + nodeMap.set(node.slug, node); + } + const edgeKeys = new Set(); + const edges: GraphEdge[] = []; + for (const edge of extraction.edges) { + const key = `${edge.from}|${edge.to}|${edge.relation}`; + if (edgeKeys.has(key)) { + continue; + } + edgeKeys.add(key); + edges.push(edge); + } + return { nodes: [...nodeMap.values()], edges }; +} diff --git a/src/wiki-engine/interface-scanner.ts b/src/wiki-engine/interface-scanner.ts new file mode 100644 index 0000000..d285c25 --- /dev/null +++ b/src/wiki-engine/interface-scanner.ts @@ -0,0 +1,280 @@ +import path from "node:path"; + +import type { CodeCollectedFile } from './code-knowledge/code-collector.js'; +import type { CodeFact } from './code-knowledge/code-extractors.js'; + +export type InterfaceType = "HTTP" | "MQ" | "RPC" | "NONE"; + +export interface InterfaceInventoryEntry { + component: string; + type: InterfaceType; + count: number; + confidence: "HIGH" | "MEDIUM" | "LOW"; + patterns: string[]; // matched lines (first 5) +} + +export interface InterfaceInventory { + entries: InterfaceInventoryEntry[]; + scannedAt: string; +} + +// --- Detection patterns per language/type --- + +interface PatternRule { + type: InterfaceType; + regex: RegExp; + languages: string[]; + confidence: "HIGH" | "MEDIUM" | "LOW"; +} + +const DETECTION_RULES: PatternRule[] = [ + // HTTP - Go + { type: "HTTP", regex: /\.HandleFunc\s*\(/u, languages: ["go"], confidence: "HIGH" }, + { type: "HTTP", regex: /(?:router|r|mux)\.\s*(?:GET|POST|PUT|DELETE|PATCH|Handle)\s*\(/u, languages: ["go"], confidence: "HIGH" }, + { type: "HTTP", regex: /http\.Handle(?:Func)?\s*\(/u, languages: ["go"], confidence: "HIGH" }, + + // HTTP - Python + { type: "HTTP", regex: /@app\.(?:route|get|post|put|delete|patch)\s*\(/u, languages: ["python"], confidence: "HIGH" }, + { type: "HTTP", regex: /@router\.(?:get|post|put|delete|patch)\s*\(/u, languages: ["python"], confidence: "HIGH" }, + { type: "HTTP", regex: /APIRouter\s*\(/u, languages: ["python"], confidence: "MEDIUM" }, + + // HTTP - Java + { type: "HTTP", regex: /@(?:Get|Post|Put|Delete|Patch)Mapping\b/u, languages: ["java"], confidence: "HIGH" }, + { type: "HTTP", regex: /@RequestMapping\b/u, languages: ["java"], confidence: "HIGH" }, + + // HTTP - TypeScript/JavaScript + { type: "HTTP", regex: /(?:router|app)\.\s*(?:get|post|put|delete|patch|use)\s*\(/u, languages: ["typescript", "javascript"], confidence: "HIGH" }, + { type: "HTTP", regex: /@(?:Get|Post|Put|Delete|Patch)\s*\(/u, languages: ["typescript", "javascript"], confidence: "HIGH" }, + + // MQ - cross-language + { type: "MQ", regex: /\.subscribe\s*\(/u, languages: ["typescript", "javascript", "python", "go", "java"], confidence: "MEDIUM" }, + { type: "MQ", regex: /\.consume\s*\(/u, languages: ["typescript", "javascript", "python", "go", "java"], confidence: "MEDIUM" }, + { type: "MQ", regex: /Exchange\s*[({]/u, languages: ["typescript", "javascript", "python", "go", "java"], confidence: "LOW" }, + { type: "MQ", regex: /Topic\s*[({]/u, languages: ["typescript", "javascript", "python", "go", "java"], confidence: "LOW" }, + { type: "MQ", regex: /@KafkaListener\b/u, languages: ["java"], confidence: "HIGH" }, + { type: "MQ", regex: /channel\.consume\s*\(/u, languages: ["typescript", "javascript", "python"], confidence: "HIGH" }, + + // RPC - proto files (language: text for .proto) + { type: "RPC", regex: /^\s*rpc\s+\w+/u, languages: ["text", "proto"], confidence: "HIGH" }, + { type: "RPC", regex: /^\s*service\s+\w+\s*\{/u, languages: ["text", "proto"], confidence: "HIGH" }, + { type: "RPC", regex: /grpc\.NewServer\s*\(/u, languages: ["go"], confidence: "HIGH" }, + { type: "RPC", regex: /@GrpcMethod\s*\(/u, languages: ["typescript", "javascript"], confidence: "HIGH" }, + { type: "RPC", regex: /registerService\s*\(/u, languages: ["go", "java"], confidence: "MEDIUM" }, +]; + +/** + * Scan collected files and produce an interface inventory per component. + * Groups files by directory to form logical components, then detects + * HTTP/MQ/RPC patterns in each. + */ +export async function scanInterfaces(files: CodeCollectedFile[]): Promise { + const componentMap = groupByComponent(files); + const entries: InterfaceInventoryEntry[] = []; + + for (const [component, componentFiles] of componentMap) { + const matches = detectInterfaces(componentFiles); + + if (matches.length === 0) { + continue; + } + + // Group by type and pick dominant + const byType = new Map(); + for (const match of matches) { + const existing = byType.get(match.type); + if (existing) { + existing.count++; + existing.confidence = higherConfidence(existing.confidence, match.confidence); + if (existing.patterns.length < 5) { + existing.patterns.push(match.line); + } + } else { + byType.set(match.type, { count: 1, confidence: match.confidence, patterns: [match.line] }); + } + } + + for (const [type, data] of byType) { + entries.push({ + component, + type, + count: data.count, + confidence: data.confidence, + patterns: data.patterns, + }); + } + } + + entries.sort((a, b) => a.component.localeCompare(b.component) || a.type.localeCompare(b.type)); + + return { + entries, + scannedAt: new Date().toISOString(), + }; +} + +interface PatternMatch { + type: InterfaceType; + confidence: "HIGH" | "MEDIUM" | "LOW"; + line: string; +} + +function detectInterfaces(files: CodeCollectedFile[]): PatternMatch[] { + const matches: PatternMatch[] = []; + + for (const file of files) { + const lines = file.content.split(/\r?\n/); + for (const line of lines) { + for (const rule of DETECTION_RULES) { + if (!rule.languages.includes(file.language)) { + continue; + } + if (rule.regex.test(line)) { + matches.push({ + type: rule.type, + confidence: rule.confidence, + line: line.trim().slice(0, 120), + }); + break; // one match per line is enough + } + } + } + } + + return matches; +} + +function groupByComponent(files: CodeCollectedFile[]): Map { + const map = new Map(); + + for (const file of files) { + // Use repo + top-level directory as component name, or just directory + const parts = file.relativePath.split("/"); + let component: string; + if (file.repo) { + // For multi-repo: repo/top-dir + component = parts.length > 1 ? `${file.repo}/${parts[0]}` : file.repo; + } else { + // Single repo: use first directory segment or root + component = parts.length > 1 ? parts[0] : path.basename(path.dirname(file.path)); + } + + const group = map.get(component) ?? []; + group.push(file); + map.set(component, group); + } + + return map; +} + +function higherConfidence(a: "HIGH" | "MEDIUM" | "LOW", b: "HIGH" | "MEDIUM" | "LOW"): "HIGH" | "MEDIUM" | "LOW" { + const rank = { HIGH: 3, MEDIUM: 2, LOW: 1 }; + return rank[a] >= rank[b] ? a : b; +} + +/** + * Scan interfaces using already-extracted CodeFacts (lightweight, avoids re-reading content). + * Merges fact-based detection with file-content scanning for deeper coverage. + */ +export async function scanInterfacesFromFacts( + facts: CodeFact[], + files: CodeCollectedFile[] +): Promise { + const factEntries = extractInterfacesFromFacts(facts); + const fileInventory = await scanInterfaces(files); + + const merged = mergeInventories(factEntries, fileInventory.entries); + + return { + entries: merged, + scannedAt: new Date().toISOString(), + }; +} + +function extractInterfacesFromFacts(facts: CodeFact[]): InterfaceInventoryEntry[] { + const componentMatches = new Map>(); + + for (const fact of facts) { + if (fact.kind !== "interface") continue; + + const component = componentFromFactFile(fact.file); + const type = classifyFactAsInterfaceType(fact); + if (type === "NONE") continue; + + if (!componentMatches.has(component)) { + componentMatches.set(component, new Map()); + } + const typeMap = componentMatches.get(component)!; + const lines = typeMap.get(type) ?? []; + lines.push(fact.detail.slice(0, 120)); + typeMap.set(type, lines); + } + + const entries: InterfaceInventoryEntry[] = []; + for (const [component, typeMap] of componentMatches) { + for (const [type, matchedLines] of typeMap) { + const count = matchedLines.length; + entries.push({ + component, + type, + count, + confidence: count >= 5 ? "HIGH" : count >= 2 ? "MEDIUM" : "LOW", + patterns: matchedLines.slice(0, 5), + }); + } + } + + return entries; +} + +function classifyFactAsInterfaceType(fact: CodeFact): InterfaceType { + const name = fact.name; + const detail = fact.detail; + + // HTTP: route-like names (e.g. "GET /api/users") + if (/^(GET|POST|PUT|DELETE|PATCH|ALL)\s+\//u.test(name)) return "HTTP"; + // Check detail against detection rules (language-agnostic check) + for (const rule of DETECTION_RULES) { + if (rule.regex.test(detail)) return rule.type; + } + + return "NONE"; +} + +function componentFromFactFile(filePath: string): string { + const parts = filePath.split("/"); + if (parts.length <= 1) return parts[0] ?? "root"; + return parts.length > 1 ? parts[0] : "root"; +} + +function mergeInventories( + factEntries: InterfaceInventoryEntry[], + fileEntries: InterfaceInventoryEntry[] +): InterfaceInventoryEntry[] { + const key = (e: InterfaceInventoryEntry) => `${e.component}::${e.type}`; + const merged = new Map(); + + // Fact-based entries first (higher trust from structured extraction) + for (const entry of factEntries) { + merged.set(key(entry), entry); + } + + // File-based entries fill gaps or augment + for (const entry of fileEntries) { + const k = key(entry); + if (!merged.has(k)) { + merged.set(k, entry); + } else { + const existing = merged.get(k)!; + if (entry.count > existing.count) { + merged.set(k, { + ...existing, + count: entry.count, + confidence: higherConfidence(existing.confidence, entry.confidence), + patterns: [...new Set([...existing.patterns, ...entry.patterns])].slice(0, 5), + }); + } + } + } + + return [...merged.values()].sort((a, b) => a.component.localeCompare(b.component) || a.type.localeCompare(b.type)); +} diff --git a/src/wiki-engine/manifest-schema.ts b/src/wiki-engine/manifest-schema.ts new file mode 100644 index 0000000..ac0f3b9 --- /dev/null +++ b/src/wiki-engine/manifest-schema.ts @@ -0,0 +1,90 @@ +/** + * Codebase output manifest schema definitions. + * + * The manifest is the contract between AI compilers (e.g. team-wiki-codebase + * Skill) and the deterministic Node-side compiler (`compileFromManifest`). + * + * Two versions are supported: + * + * - **v1** — Original schema. Components carry slug/category/upstream/downstream + * and basic evidenceRefs. Edges only carry from/to/relation/confidence. + * + * - **v2** — Backward-compatible extension. All v1 fields preserved. + * Adds: + * - `component.entrypoints` / `component.responsibilities` — surfaced in + * the rendered component page as standard sections. + * - `edge.evidenceRefs` / `edge.reason` / `edge.sourceRange` — translated + * into `GraphEdge.evidence: WikiEvidence[]` so the graph "knows why two + * components are connected". + * + * The compiler dispatches on `schemaVersion` via `isManifestV2`. v1 manifests + * continue to compile with zero behaviour change. + */ + +export type ManifestConfidence = "EXTRACTED" | "INFERRED" | "AMBIGUOUS"; + +/** Optional provenance for manifest edges (GRAPH-CAPABILITIES). */ +export type ManifestEdgeSource = + | "code-ast" + | "code-heuristic" + | "doc-structure" + | "doc-entity" + | "agent"; + +interface ManifestComponentBase { + slug: string; + docPath: string; + title?: string; + category: string; + confidence: ManifestConfidence; + upstream?: string[]; + downstream?: string[]; + interfaces?: string[]; + errorCodeRanges?: string[]; + evidenceRefs?: string[]; +} + +interface ManifestEdgeBase { + from: string; + to: string; + relation: string; + protocol?: string; + confidence: ManifestConfidence; + weight?: number; +} + +export interface CodebaseOutputManifestV1 { + schemaVersion: "team-wiki.codebase-output-manifest.v1"; + project: string; + generatedAt: string; + components: ManifestComponentBase[]; + edges: ManifestEdgeBase[]; + graphLayers?: Record; +} + +export interface ManifestComponentV2 extends ManifestComponentBase { + entrypoints?: string[]; + responsibilities?: string[]; +} + +export interface ManifestEdgeV2 extends ManifestEdgeBase { + evidenceRefs?: string[]; + reason?: string; + source?: ManifestEdgeSource; + sourceRange?: { file: string; lines: [number, number] }; +} + +export interface CodebaseOutputManifestV2 { + schemaVersion: "team-wiki.codebase-output-manifest.v2"; + project: string; + generatedAt: string; + components: ManifestComponentV2[]; + edges: ManifestEdgeV2[]; + graphLayers?: Record; +} + +export type CodebaseOutputManifest = CodebaseOutputManifestV1 | CodebaseOutputManifestV2; + +export function isManifestV2(manifest: CodebaseOutputManifest): manifest is CodebaseOutputManifestV2 { + return manifest.schemaVersion === "team-wiki.codebase-output-manifest.v2"; +} From fa26ad81650fa8c88689eeef840a5548aae727bd Mon Sep 17 00:00:00 2001 From: jaelgeng Date: Fri, 26 Jun 2026 11:36:43 +0800 Subject: [PATCH 2/4] feat(extract): integrate wiki-engine into codebase extraction pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire up vendored modules into the teamai extraction flow: - adapters/index.ts: unified export layer for all wiki-engine modules - adapters/templates.ts: router.md + index.md generation templates - codebase-extract.ts: full extraction pipeline collectCode → extractCodeFacts → scanInterfaces → traceCallChains → buildEvidencePages (interfaces.md + call-chains.md) → buildIndexHubOverlay → mergedGraph → graph-index.json → buildModuleSummaries → detectKnowledgeGaps → router/index/hot/gaps - utils/hook-output.ts: multi-tool Stop hook output formatting --- src/codebase-extract.ts | 578 ++++++++++++++++++++++++++ src/utils/hook-output.ts | 27 ++ src/wiki-engine/adapters/index.ts | 34 ++ src/wiki-engine/adapters/templates.ts | 33 ++ 4 files changed, 672 insertions(+) create mode 100644 src/codebase-extract.ts create mode 100644 src/utils/hook-output.ts create mode 100644 src/wiki-engine/adapters/index.ts create mode 100644 src/wiki-engine/adapters/templates.ts diff --git a/src/codebase-extract.ts b/src/codebase-extract.ts new file mode 100644 index 0000000..c80926a --- /dev/null +++ b/src/codebase-extract.ts @@ -0,0 +1,578 @@ +/** + * Codebase knowledge extraction and graph building. + * + * Knowledge graph architecture and wiki protocol based on Team Wiki + * by @lurkacai. Core concepts: structured code facts, graph-index, + * evidence pages, router/hot/index navigation, and gaps detection. + */ + +import { mkdir, writeFile, readFile } from 'node:fs/promises'; +import path from 'node:path'; + +import chalk from 'chalk'; + +import { + collectCode, + extractCodeFacts, + buildCodeGraph, + detectCodeIncrementalChanges, + scanInterfaces, + traceCallChains, + buildIndexHubOverlay, + mergeGraphs, + createGraphIndex, +} from './wiki-engine/adapters/index.js'; +import type { CodeFact, CodeGraphIndex, InterfaceInventory, CallChain } from './wiki-engine/adapters/index.js'; +import { routerTemplate, indexTemplate, HOT_TEMPLATE } from './wiki-engine/adapters/templates.js'; + +export interface ExtractCodebaseOptions { + path?: string; + incremental?: boolean; + json?: boolean; + project?: string; + maxFiles?: number; +} + +interface ExtractResult { + project: string; + filesScanned: number; + facts: { total: number; byKind: Record }; + graph: { nodes: number; edges: number }; + incremental: boolean; + outputDir: string; +} + +interface KnowledgeGap { + id: string; + kind: string; + description: string; + source: string; +} + +function detectKnowledgeGaps( + facts: CodeFact[], + graph: CodeGraphIndex, + files: Array<{ relativePath: string }>, +): KnowledgeGap[] { + const gaps: KnowledgeGap[] = []; + const scannedFiles = new Set(files.map((f) => f.relativePath)); + const nodeFiles = new Set(graph.nodes.map((n) => n.file)); + const connectedNodes = new Set(); + for (const edge of graph.edges) { + connectedNodes.add(edge.from); + connectedNodes.add(edge.to); + } + + // 1. 未解析的外部依赖:import target 不在扫描范围内 + const relationFacts = facts.filter((f) => f.kind === 'relation'); + const unresolvedImports = new Set(); + for (const rel of relationFacts) { + const target = rel.name; + if (target.startsWith('.')) continue; // 相对路径跳过 + if (target.startsWith('node:')) continue; // Node 内置模块跳过 + const matchesAnyFile = [...scannedFiles].some((f) => f.includes(target.replace(/\//g, path.sep))); + if (!matchesAnyFile) { + unresolvedImports.add(target); + } + } + if (unresolvedImports.size > 5) { + gaps.push({ + id: 'unresolved-external-deps', + kind: 'EXTERNAL_DEP_UNDOCUMENTED', + description: `${unresolvedImports.size} 个外部依赖未在知识库中记录(如 ${[...unresolvedImports].slice(0, 3).join(', ')})`, + source: 'relation facts', + }); + } + + // 2. 接口无实现:有 interface 声明但图谱中无 IMPLEMENTS 边指向它 + const interfaces = facts.filter((f) => f.kind === 'interface'); + const components = facts.filter((f) => f.kind === 'component'); + const componentNames = new Set(components.map((c) => c.name.toLowerCase())); + const unimplemented: string[] = []; + for (const iface of interfaces) { + const name = iface.name.toLowerCase(); + const hasImpl = componentNames.has(name) || + componentNames.has(name.replace(/^i/, '').toLowerCase()) || + componentNames.has((name + 'impl').toLowerCase()); + if (!hasImpl) { + unimplemented.push(iface.name); + } + } + if (unimplemented.length > 3) { + gaps.push({ + id: 'interface-no-impl', + kind: 'IMPL_MISSING', + description: `${unimplemented.length} 个接口未发现对应实现(如 ${unimplemented.slice(0, 3).join(', ')})`, + source: 'interface facts', + }); + } + + // 3. 孤立组件:有节点但与图谱中其他节点无任何连接 + const orphanNodes = graph.nodes.filter( + (n) => !connectedNodes.has(n.id) && !connectedNodes.has(n.file), + ); + if (orphanNodes.length > 5 && orphanNodes.length > graph.nodes.length * 0.3) { + gaps.push({ + id: 'high-orphan-ratio', + kind: 'LOW_CONNECTIVITY', + description: `${orphanNodes.length}/${graph.nodes.length} 个节点无图谱连接,依赖关系可能未被完整提取`, + source: 'graph-index.json', + }); + } + + // 4. 无错误处理模式:有组件但无 error 类型定义 + const errorFacts = facts.filter((f) => f.kind === 'error'); + if (components.length > 10 && errorFacts.length === 0) { + gaps.push({ + id: 'no-error-patterns', + kind: 'ERROR_HANDLING_UNDOCUMENTED', + description: `项目有 ${components.length} 个组件但未检测到错误类型定义,错误处理模式可能未文档化`, + source: 'code scan', + }); + } + + // 5. 无配置项目:有组件但无 config/env 提取 + const configFacts = facts.filter((f) => f.kind === 'config'); + if (components.length > 10 && configFacts.length === 0) { + gaps.push({ + id: 'no-config-detected', + kind: 'CONFIG_UNDOCUMENTED', + description: `项目有 ${components.length} 个组件但未检测到配置项/环境变量,配置管理可能未文档化`, + source: 'code scan', + }); + } + + return gaps; +} + +function buildEvidencePages( + facts: CodeFact[], + project: string, + interfaceInventory?: InterfaceInventory, + callChains?: CallChain[], +): Map { + const pages = new Map(); + const byKind = new Map(); + + for (const fact of facts) { + if (fact.kind === 'relation') continue; + const existing = byKind.get(fact.kind) ?? []; + existing.push(fact); + byKind.set(fact.kind, existing); + } + + for (const [kind, kindFacts] of byKind) { + const lines = [ + '---', + `title: ${project} ${kind}`, + 'domain: code-knowledge', + `source:`, + ...Array.from(new Set(kindFacts.map((f) => f.file))).map((f) => ` - ${f}`), + '---', + '', + `# ${kind.charAt(0).toUpperCase() + kind.slice(1)}`, + '', + ]; + + for (const fact of kindFacts) { + lines.push(`- \`${fact.name}\` ← ${fact.file}:${fact.lineStart} [${fact.confidence}]`); + if (fact.detail) { + lines.push(` \`\`\`\n ${fact.detail.trim()}\n \`\`\``); + } + } + + pages.set(`${kind}.md`, lines.join('\n')); + } + + const relationFacts = facts.filter((f) => f.kind === 'relation'); + if (relationFacts.length > 0) { + const byDir = new Map(); + for (const fact of relationFacts) { + const seg = fact.file.split('/')[0] || '_root'; + const existing = byDir.get(seg) ?? []; + existing.push(fact); + byDir.set(seg, existing); + } + for (const [seg, segFacts] of byDir) { + const lines = [ + '---', + `title: ${project} relations (${seg})`, + 'domain: code-knowledge', + '---', + '', + `# Relations (${seg})`, + '', + ]; + for (const fact of segFacts) { + lines.push(`- \`${fact.name}\` ← ${fact.file}:${fact.lineStart}`); + } + pages.set(`relation-${seg}.md`, lines.join('\n')); + } + } + + // Interface Inventory page + if (interfaceInventory && interfaceInventory.entries.length > 0) { + const ifLines = [ + '---', + `title: ${project} interface inventory`, + 'domain: code-knowledge', + '---', + '', + '# Interface Inventory', + '', + '| Component | Type | Count | Confidence | Patterns |', + '|-----------|------|-------|------------|----------|', + ]; + for (const entry of interfaceInventory.entries) { + const patterns = entry.patterns.slice(0, 2).map(p => `\`${p.trim()}\``).join(', '); + ifLines.push(`| ${entry.component} | ${entry.type} | ${entry.count} | ${entry.confidence} | ${patterns} |`); + } + ifLines.push(''); + pages.set('interfaces.md', ifLines.join('\n')); + } + + // Call Chains page + if (callChains && callChains.length > 0) { + const ccLines = [ + '---', + `title: ${project} call chains`, + 'domain: code-knowledge', + '---', + '', + '# Call Chains', + '', + `${callChains.length} call chain(s) traced from entry points (max depth 4).`, + '', + ]; + for (const chain of callChains.slice(0, 20)) { + ccLines.push(`## ${chain.entryPoint}`); + ccLines.push(''); + for (const step of chain.steps) { + const indent = step.layer === 'entry' ? '' : step.layer === 'orchestration' ? ' ' : step.layer === 'service' ? ' ' : ' '; + ccLines.push(`${indent}- [${step.layer}] \`${step.symbol}\` ← ${step.file}:${step.lineStart}`); + } + ccLines.push(''); + } + pages.set('call-chains.md', ccLines.join('\n')); + } + + const indexLines = [ + '---', + `title: ${project} code knowledge index`, + 'domain: code-knowledge', + '---', + '', + `# ${project}`, + '', + `Facts: ${facts.length} | Pages: ${pages.size}`, + '', + ]; + + // Interface summary in index + if (interfaceInventory && interfaceInventory.entries.length > 0) { + const byType: Record = {}; + for (const e of interfaceInventory.entries) { + byType[e.type] = (byType[e.type] ?? 0) + e.count; + } + indexLines.push('## Interface Inventory'); + indexLines.push(''); + indexLines.push(`| Type | Count |`); + indexLines.push(`|------|-------|`); + for (const [type, count] of Object.entries(byType)) { + indexLines.push(`| ${type} | ${count} |`); + } + indexLines.push(''); + } + + indexLines.push('## Pages'); + indexLines.push(''); + for (const pageName of pages.keys()) { + indexLines.push(`- [${pageName}](./${pageName})`); + } + pages.set('index.md', indexLines.join('\n')); + + return pages; +} + +function buildModuleSummaries( + facts: CodeFact[], + graph: CodeGraphIndex, + project: string, +): Map { + const modules = new Map(); + + // 按顶层目录分组(排除 relation facts) + for (const fact of facts) { + if (fact.kind === 'relation') continue; + const parts = fact.file.split('/'); + const module = parts.length > 1 ? parts[0] : '_root'; + const existing = modules.get(module) ?? []; + existing.push(fact); + modules.set(module, existing); + } + + const summaries = new Map(); + + // 只为有 5+ 个 facts 的模块生成摘要 + for (const [module, moduleFacts] of modules) { + if (moduleFacts.length < 5) continue; + + // 统计该模块的引用次数(作为 edge target 的次数) + const fileRefs = new Map(); + for (const edge of graph.edges) { + if (edge.to.startsWith(module + '/') || edge.to === module) { + fileRefs.set(edge.to, (fileRefs.get(edge.to) ?? 0) + 1); + } + } + + // 按 kind 统计 + const kindCounts: Record = {}; + for (const f of moduleFacts) { + kindCounts[f.kind] = (kindCounts[f.kind] ?? 0) + 1; + } + + // 按引用次数排序,取 top 20 核心组件 + const ranked = moduleFacts + .filter(f => f.kind === 'component' || f.kind === 'interface') + .map(f => ({ ...f, refs: fileRefs.get(f.file) ?? 0 })) + .sort((a, b) => b.refs - a.refs) + .slice(0, 20); + + // 该模块依赖的其他模块 + const depsTo = new Set(); + const depsFrom = new Set(); + for (const edge of graph.edges) { + if (edge.from.startsWith(module + '/')) { + const targetMod = edge.to.split('/')[0]; + if (targetMod !== module) depsTo.add(targetMod); + } + if (edge.to.startsWith(module + '/')) { + const sourceMod = edge.from.split('/')[0]; + if (sourceMod !== module) depsFrom.add(sourceMod); + } + } + + const lines = [ + '---', + `title: ${project} — ${module} module`, + 'domain: code-knowledge', + `source: [${module}/]`, + '---', + '', + `# ${module}`, + '', + `**${moduleFacts.length} facts** (${Object.entries(kindCounts).map(([k, v]) => `${k}: ${v}`).join(', ')})`, + '', + ]; + + if (depsTo.size > 0) { + lines.push(`**Depends on**: ${[...depsTo].join(', ')}`); + } + if (depsFrom.size > 0) { + lines.push(`**Depended by**: ${[...depsFrom].join(', ')}`); + } + if (depsTo.size > 0 || depsFrom.size > 0) lines.push(''); + + lines.push('## Core components'); + lines.push(''); + for (const item of ranked) { + const refStr = item.refs > 0 ? ` (${item.refs} refs)` : ''; + lines.push(`- \`${item.name}\` ← ${item.file}:${item.lineStart}${refStr}`); + } + + if (moduleFacts.some(f => f.kind === 'config')) { + lines.push(''); + lines.push('## Config'); + lines.push(''); + for (const f of moduleFacts.filter(f => f.kind === 'config').slice(0, 10)) { + lines.push(`- \`${f.name}\` ← ${f.file}`); + } + } + + if (moduleFacts.some(f => f.kind === 'error')) { + lines.push(''); + lines.push('## Errors'); + lines.push(''); + for (const f of moduleFacts.filter(f => f.kind === 'error').slice(0, 10)) { + lines.push(`- \`${f.name}\` ← ${f.file}`); + } + } + + lines.push(''); + summaries.set(`${module}.md`, lines.join('\n')); + } + + return summaries; +} + +export async function extractCodebase(opts: ExtractCodebaseOptions): Promise { + const root = path.resolve(opts.path || '.'); + const project = opts.project || path.basename(root); + const maxFiles = opts.maxFiles || 200; + + const wikiRoot = path.join(root, 'teamwiki'); + const evidenceDir = path.join(wikiRoot, 'evidence', 'code', project); + const indicesDir = path.join(wikiRoot, '.indices'); + const manifestPath = path.join(wikiRoot, 'source-manifest.json'); + + let changedFiles: string[] | undefined; + if (opts.incremental) { + try { + const changes = await detectCodeIncrementalChanges(root, manifestPath, project); + if (changes.added.length === 0 && changes.changed.length === 0 && changes.deleted.length === 0) { + if (opts.json) { + console.log(JSON.stringify({ status: 'up-to-date', project })); + } else { + console.log(chalk.green(`[extract] ${project}: 无变更,跳过。`)); + } + return; + } + changedFiles = [...changes.added, ...changes.changed]; + if (!opts.json) { + console.log(chalk.dim(`[extract] 增量模式:${changedFiles.length} 文件变更`)); + } + } catch { + if (!opts.json) { + console.log(chalk.dim('[extract] 无历史 manifest,执行全量提取')); + } + } + } + + const { files } = await collectCode({ root, maxFiles, changedFiles }); + if (files.length === 0) { + if (opts.json) { + console.log(JSON.stringify({ status: 'no-files', project })); + } else { + console.log(chalk.yellow(`[extract] ${project}: 未发现可提取的源代码文件。`)); + } + return; + } + + const facts = extractCodeFacts(files); + const graph: CodeGraphIndex = buildCodeGraph(facts); + + // Interface detection (HTTP/MQ/RPC) + const interfaceInventory = await scanInterfaces(files); + + // Call chain tracing (entry → orchestration → service → data) + const callChains = traceCallChains(facts, files); + + const pages = buildEvidencePages(facts, project, interfaceInventory, callChains); + + await mkdir(evidenceDir, { recursive: true }); + await mkdir(indicesDir, { recursive: true }); + + for (const [filename, content] of pages) { + await writeFile(path.join(evidenceDir, filename), content, 'utf-8'); + } + + // Build architecture overlay (directory-level contains edges) + const pageSlugs = [...pages.keys()].map(p => `evidence/code/${project}/${p.replace('.md', '')}`); + const overlay = buildIndexHubOverlay(project, 'evidence/code', pageSlugs); + + // Merge overlay nodes/edges into CodeGraphIndex format + const overlayNodes = overlay.nodes + .filter(n => !graph.nodes.some(gn => gn.id === n.slug)) + .map(n => ({ id: n.slug, kind: 'component' as const, label: n.title, file: '' })); + const overlayEdges = overlay.edges + .map(e => ({ from: e.from, to: e.to, relation: 'mentions' as const })); + + const mergedGraph: CodeGraphIndex = { + schemaVersion: graph.schemaVersion ?? 'team-wiki.graph-index.v1', + generatedAt: new Date().toISOString(), + nodes: [...graph.nodes, ...overlayNodes], + edges: [...graph.edges, ...overlayEdges], + }; + + await writeFile( + path.join(indicesDir, 'graph-index.json'), + JSON.stringify(mergedGraph, null, 2), + 'utf-8', + ); + + // 生成模块级摘要页(按顶层目录聚合) + const moduleSummaries = buildModuleSummaries(facts, graph, project); + if (moduleSummaries.size > 0) { + const modulesDir = path.join(evidenceDir, 'modules'); + await mkdir(modulesDir, { recursive: true }); + for (const [filename, content] of moduleSummaries) { + await writeFile(path.join(modulesDir, filename), content, 'utf-8'); + } + } + + // 生成 team-wiki 标准入口文件 + const proj = [{ slug: project, label: project }]; + await writeFile(path.join(wikiRoot, 'router.md'), routerTemplate(proj), 'utf-8'); + await writeFile(path.join(wikiRoot, 'hot.md'), HOT_TEMPLATE, 'utf-8'); + await writeFile(path.join(wikiRoot, 'index.md'), indexTemplate(proj), 'utf-8'); + + // 生成 gaps/ — 知识缺口追踪 + const gaps = detectKnowledgeGaps(facts, graph, files); + const gapsDir = path.join(wikiRoot, 'gaps'); + await mkdir(gapsDir, { recursive: true }); + const gapLines = [ + '---', + 'title: Knowledge Gaps', + `domain: ${project}`, + 'source: []', + '---', + '', + '# Knowledge Gaps', + '', + '在代码知识提取过程中发现的缺口。这些条目表示知识库尚未覆盖的领域,recall 命中 gap 时不应凭空回答。', + '', + '| ID | Kind | Status | Description | Source |', + '|----|------|--------|-------------|--------|', + ]; + for (const gap of gaps) { + gapLines.push(`| ${gap.id} | ${gap.kind} | open | ${gap.description} | ${gap.source} |`); + } + if (gaps.length === 0) { + gapLines.push('| — | — | — | 未发现明显知识缺口 | — |'); + } + gapLines.push(''); + await writeFile(path.join(gapsDir, 'detected.md'), gapLines.join('\n'), 'utf-8'); + + const manifest = { + version: 1, + lastScan: new Date().toISOString(), + files: files.map((f) => ({ + relativePath: f.relativePath, + sha256: f.sha256, + language: f.language, + })), + }; + await writeFile(manifestPath, JSON.stringify(manifest, null, 2), 'utf-8'); + + const byKind: Record = {}; + for (const fact of facts) { + byKind[fact.kind] = (byKind[fact.kind] ?? 0) + 1; + } + + const result: ExtractResult = { + project, + filesScanned: files.length, + facts: { total: facts.length, byKind }, + graph: { nodes: mergedGraph.nodes.length, edges: mergedGraph.edges.length }, + incremental: !!opts.incremental && !!changedFiles, + outputDir: wikiRoot, + }; + + if (opts.json) { + console.log(JSON.stringify(result, null, 2)); + } else { + console.log(chalk.green(`[extract] ${project} 完成`)); + console.log(` 文件: ${result.filesScanned}`); + console.log(` 事实: ${result.facts.total} (${Object.entries(byKind).map(([k, v]) => `${k}:${v}`).join(', ')})`); + console.log(` 图谱: ${result.graph.nodes} nodes, ${result.graph.edges} edges`); + if (interfaceInventory.entries.length > 0) { + const byType: Record = {}; + for (const e of interfaceInventory.entries) byType[e.type] = (byType[e.type] ?? 0) + e.count; + console.log(` 接口: ${Object.entries(byType).map(([t, c]) => `${t}:${c}`).join(', ')}`); + } + if (callChains.length > 0) { + console.log(` 调用链: ${callChains.length} chains (max depth ${Math.max(...callChains.map(c => c.depth))})`); + } + console.log(` 输出: ${wikiRoot}`); + } +} diff --git a/src/utils/hook-output.ts b/src/utils/hook-output.ts new file mode 100644 index 0000000..e30791a --- /dev/null +++ b/src/utils/hook-output.ts @@ -0,0 +1,27 @@ +/** + * Multi-tool-aware hook output formatting. + * + * Different AI tools parse Stop hook STDOUT differently: + * - Claude Code / CodeBuddy: hookSpecificOutput.additionalContext → visible to AI + * - Cursor: direct JSON message → shown in UI + * - Codex etc.: default hookSpecificOutput (maximum compatibility) + */ + +/** + * Format Stop hook output so the AI can see the hint content. + * + * @param message Hint text to pass to the AI + * @param tool Current AI tool identifier (claude / cursor / codebuddy / codex / etc.) + * @returns JSON string to write to STDOUT + */ +export function formatStopHookOutput(message: string, tool: string): string { + if (tool === 'cursor') { + return JSON.stringify({ message }); + } + return JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'Stop', + additionalContext: message, + }, + }); +} diff --git a/src/wiki-engine/adapters/index.ts b/src/wiki-engine/adapters/index.ts new file mode 100644 index 0000000..2d7a8e2 --- /dev/null +++ b/src/wiki-engine/adapters/index.ts @@ -0,0 +1,34 @@ +/** + * Team Wiki Engine — vendored from Team Wiki project by @lurkacai. + * Core concepts: code fact extraction, knowledge graph, evidence pages. + */ + +export { collectCode } from '../code-knowledge/code-collector.js'; +export type { CodeCollectedFile, CollectCodeOptions } from '../code-knowledge/code-collector.js'; + +export { extractCodeFacts } from '../code-knowledge/code-extractors.js'; +export type { CodeFact, CodeFactKind, CodeEvidenceType } from '../code-knowledge/code-extractors.js'; + +export { buildCodeGraph, buildCodeGraphIndex } from '../code-knowledge/code-graph.js'; +export type { CodeGraphIndex } from '../code-knowledge/code-graph.js'; + +export { detectCodeIncrementalChanges } from '../code-knowledge/code-incremental.js'; + +export { + mergeGraphs, + loadGraphIndex, + saveGraphIndex, + createGraphIndex, + findNeighbors, + findNeighborsNHop, + GRAPH_INDEX_SCHEMA_VERSION, +} from '../core/graph-index.schema.js'; +export type { GraphIndex, GraphNode, GraphEdge, RelationType } from '../core/graph-index.schema.js'; + +export { scanInterfaces } from '../interface-scanner.js'; +export type { InterfaceInventory, InterfaceInventoryEntry, InterfaceType } from '../interface-scanner.js'; + +export { traceCallChains } from '../call-chain-tracer.js'; +export type { CallChain, CallChainStep, CallChainLayer } from '../call-chain-tracer.js'; + +export { buildIndexHubOverlay } from '../code-graph-overlay.js'; diff --git a/src/wiki-engine/adapters/templates.ts b/src/wiki-engine/adapters/templates.ts new file mode 100644 index 0000000..35c35dd --- /dev/null +++ b/src/wiki-engine/adapters/templates.ts @@ -0,0 +1,33 @@ +export function routerTemplate(projects: Array<{ slug: string; label: string }>): string { + const links = projects.map(p => `- [[code/${p.slug}/index]] — ${p.label} 代码知识`).join('\n'); + return `# Team Wiki Router\n\nRoute broad questions to the relevant domain entrypoint.\n\n${links}\n`; +} + +export function indexTemplate(projects: Array<{ slug: string; label: string }>): string { + const domains = projects + .map(p => `- [${p.slug}](./evidence/code/${p.slug}/index.md) — 代码知识图谱`) + .join('\n'); + return [ + '# Team Wiki Index', + '', + `Last updated: ${new Date().toISOString()}`, + '', + '## Domains', + '', + domains, + '', + '## Navigation', + '', + '- [router.md](./router.md) — 领域路由入口', + '- [hot.md](./hot.md) — 活跃工作记忆', + '', + ].join('\n'); +} + +export const HOT_TEMPLATE = [ + '# Hot Context', + '', + 'Keep only active working memory here: current focus, recent decisions, open questions.', + 'Move durable conclusions into domain pages.', + '', +].join('\n'); From 5f1759f5fec9abb95b79ff6eace810d0166e38b2 Mon Sep 17 00:00:00 2001 From: jaelgeng Date: Fri, 26 Jun 2026 11:36:59 +0800 Subject: [PATCH 3/4] test: unit tests for wiki-engine modules + hook-output (39 tests) - interface-scanner: HTTP/MQ/RPC detection across languages (12 tests) - call-chain-tracer: entry detection, layer classification (8 tests) - code-graph-overlay: buildIndexHubOverlay node/edge generation (5 tests) - doc-graph-extractor: structure + entity extraction (8 tests) - hook-output: formatStopHookOutput multi-tool format (6 tests) All tests use in-memory data, no filesystem/network dependencies. --- src/__tests__/hook-output.test.ts | 42 ++++ src/__tests__/wiki-engine.test.ts | 346 ++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+) create mode 100644 src/__tests__/hook-output.test.ts create mode 100644 src/__tests__/wiki-engine.test.ts diff --git a/src/__tests__/hook-output.test.ts b/src/__tests__/hook-output.test.ts new file mode 100644 index 0000000..099c09d --- /dev/null +++ b/src/__tests__/hook-output.test.ts @@ -0,0 +1,42 @@ +import { describe, it, expect } from 'vitest'; +import { formatStopHookOutput } from '../utils/hook-output.js'; + +describe('formatStopHookOutput', () => { + it('claude: returns hookSpecificOutput format', () => { + const result = formatStopHookOutput('hello', 'claude'); + const parsed = JSON.parse(result); + expect(parsed.hookSpecificOutput.hookEventName).toBe('Stop'); + expect(parsed.hookSpecificOutput.additionalContext).toBe('hello'); + }); + + it('codebuddy: returns hookSpecificOutput format (same as claude)', () => { + const result = formatStopHookOutput('msg', 'codebuddy'); + const parsed = JSON.parse(result); + expect(parsed.hookSpecificOutput).toBeDefined(); + expect(parsed.hookSpecificOutput.additionalContext).toBe('msg'); + }); + + it('cursor: returns {message} format', () => { + const result = formatStopHookOutput('test', 'cursor'); + const parsed = JSON.parse(result); + expect(parsed.message).toBe('test'); + expect(parsed.hookSpecificOutput).toBeUndefined(); + }); + + it('unknown tool: defaults to hookSpecificOutput', () => { + const result = formatStopHookOutput('x', 'codex'); + const parsed = JSON.parse(result); + expect(parsed.hookSpecificOutput.additionalContext).toBe('x'); + }); + + it('returns valid JSON string', () => { + const result = formatStopHookOutput('any message', 'claude'); + expect(() => JSON.parse(result)).not.toThrow(); + }); + + it('empty message is preserved in output', () => { + const result = formatStopHookOutput('', 'claude'); + const parsed = JSON.parse(result); + expect(parsed.hookSpecificOutput.additionalContext).toBe(''); + }); +}); diff --git a/src/__tests__/wiki-engine.test.ts b/src/__tests__/wiki-engine.test.ts new file mode 100644 index 0000000..0572b8f --- /dev/null +++ b/src/__tests__/wiki-engine.test.ts @@ -0,0 +1,346 @@ +import { describe, it, expect } from 'vitest'; +import { scanInterfaces } from '../wiki-engine/interface-scanner.js'; +import { traceCallChains } from '../wiki-engine/call-chain-tracer.js'; +import { buildIndexHubOverlay } from '../wiki-engine/code-graph-overlay.js'; +import { extractDocStructure, extractDocEntities, wikiLinkToPageSlug, entitySlugFor } from '../wiki-engine/doc-graph-extractor.js'; +import type { CodeCollectedFile } from '../wiki-engine/code-knowledge/code-collector.js'; +import type { CodeFact } from '../wiki-engine/code-knowledge/code-extractors.js'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +const makeFile = (relativePath: string, content: string, language: string): CodeCollectedFile => ({ + path: `/repo/${relativePath}`, + relativePath, + content, + language, + sha256: 'mock-sha', +}); + +const makeFact = (name: string, kind: string, file: string, lineStart = 1): CodeFact => ({ + name, + kind: kind as CodeFact['kind'], + file, + lineStart, + lineEnd: lineStart + 5, + detail: '', + confidence: 'EXTRACTED' as const, + evidenceType: 'source' as CodeFact['evidenceType'], +}); + +// --------------------------------------------------------------------------- +// interface-scanner +// --------------------------------------------------------------------------- + +describe('scanInterfaces', () => { + it('returns HTTP entry for TypeScript router.get pattern', async () => { + const files = [makeFile('src/routes.ts', "router.get('/users', handler);", 'typescript')]; + const result = await scanInterfaces(files); + expect(result.entries.length).toBeGreaterThan(0); + const entry = result.entries[0]; + expect(entry.type).toBe('HTTP'); + }); + + it('returns HTTP with HIGH confidence for Python @app.route', async () => { + const files = [makeFile('api/app.py', "@app.route('/health')\ndef health(): pass", 'python')]; + const result = await scanInterfaces(files); + const entry = result.entries.find(e => e.type === 'HTTP'); + expect(entry).toBeDefined(); + expect(entry!.confidence).toBe('HIGH'); + }); + + it('returns RPC entry for Go grpc.NewServer pattern', async () => { + const files = [makeFile('server/grpc.go', 's := grpc.NewServer()', 'go')]; + const result = await scanInterfaces(files); + const entry = result.entries.find(e => e.type === 'RPC'); + expect(entry).toBeDefined(); + }); + + it('returns MQ entry for channel.consume pattern', async () => { + const files = [makeFile('worker/mq.ts', 'channel.consume(queue, handler);', 'typescript')]; + const result = await scanInterfaces(files); + const entry = result.entries.find(e => e.type === 'MQ'); + expect(entry).toBeDefined(); + // The generic .consume rule (MEDIUM) fires before the channel.consume rule (HIGH) + // because DETECTION_RULES applies the first matching rule per line. + expect(['HIGH', 'MEDIUM']).toContain(entry!.confidence); + }); + + it('returns empty entries when no patterns match', async () => { + const files = [makeFile('utils/helper.ts', 'export const add = (a: number) => a + 1;', 'typescript')]; + const result = await scanInterfaces(files); + expect(result.entries).toHaveLength(0); + expect(result.scannedAt).toBeTruthy(); + }); + + it('groups files by top-level directory as component', async () => { + const files = [ + makeFile('api/handler.ts', "router.get('/a', fn);", 'typescript'), + makeFile('api/middleware.ts', "router.post('/b', fn);", 'typescript'), + ]; + const result = await scanInterfaces(files); + expect(result.entries[0].component).toBe('api'); + expect(result.entries[0].count).toBeGreaterThanOrEqual(2); + }); + + it('returns multiple pattern lines up to 5 in patterns array', async () => { + const routes = Array.from({ length: 7 }, (_, i) => `router.get('/r${i}', fn);`).join('\n'); + const files = [makeFile('routes/index.ts', routes, 'typescript')]; + const result = await scanInterfaces(files); + const entry = result.entries.find(e => e.type === 'HTTP'); + expect(entry!.patterns.length).toBeLessThanOrEqual(5); + }); +}); + +// --------------------------------------------------------------------------- +// call-chain-tracer +// --------------------------------------------------------------------------- + +describe('traceCallChains', () => { + it('returns a chain for a handler entry point fact', () => { + const facts: CodeFact[] = [ + makeFact('UserHandler', 'component', 'src/handler.ts'), + ]; + const files: CodeCollectedFile[] = [ + makeFile('src/handler.ts', 'export class UserHandler {}', 'typescript'), + ]; + const chains = traceCallChains(facts, files); + expect(chains.length).toBeGreaterThan(0); + expect(chains[0].steps[0].layer).toBe('entry'); + }); + + it('returns a chain with entry layer for route-named component', () => { + const facts: CodeFact[] = [ + makeFact('GET /api/users', 'interface', 'src/routes.ts'), + ]; + const files: CodeCollectedFile[] = [ + makeFile('src/routes.ts', '', 'typescript'), + ]; + const chains = traceCallChains(facts, files); + expect(chains.length).toBeGreaterThan(0); + const firstStep = chains[0].steps[0]; + expect(firstStep.layer).toBe('entry'); + }); + + it('returns empty array when no entry points exist', () => { + const facts: CodeFact[] = [ + makeFact('calculateTotal', 'component', 'src/math.ts'), + ]; + const files: CodeCollectedFile[] = [ + makeFile('src/math.ts', 'export const calculateTotal = () => 0;', 'typescript'), + ]; + const chains = traceCallChains(facts, files); + expect(chains).toHaveLength(0); + }); + + it('depth does not exceed 4', () => { + // Create a chain of handler → relation → relation → ... + const facts: CodeFact[] = [ + makeFact('handleRequest', 'component', 'src/controller.ts'), + makeFact('./service', 'relation', 'src/controller.ts'), + makeFact('doWork', 'component', 'src/service.ts'), + makeFact('./repo', 'relation', 'src/service.ts'), + makeFact('findAll', 'component', 'src/repo.ts'), + makeFact('./db', 'relation', 'src/repo.ts'), + makeFact('query', 'component', 'src/db.ts'), + makeFact('./extra', 'relation', 'src/db.ts'), + makeFact('extra', 'component', 'src/extra.ts'), + ]; + const files: CodeCollectedFile[] = [ + makeFile('src/controller.ts', '', 'typescript'), + makeFile('src/service.ts', '', 'typescript'), + makeFile('src/repo.ts', '', 'typescript'), + makeFile('src/db.ts', '', 'typescript'), + makeFile('src/extra.ts', '', 'typescript'), + ]; + const chains = traceCallChains(facts, files); + for (const chain of chains) { + expect(chain.depth).toBeLessThanOrEqual(4); + } + }); + + it('picks up key file with handler-like path as entry', () => { + const facts: CodeFact[] = []; + const files: CodeCollectedFile[] = [ + { + path: '/repo/src/handler.ts', + relativePath: 'src/handler.ts', + content: '', + language: 'typescript', + sha256: 'x', + isKeyFile: true, + }, + ]; + const chains = traceCallChains(facts, files); + expect(chains.length).toBeGreaterThan(0); + }); +}); + +// --------------------------------------------------------------------------- +// code-graph-overlay +// --------------------------------------------------------------------------- + +describe('buildIndexHubOverlay', () => { + it('produces index node plus one component node per slug', () => { + const slugs = ['code/myproject/functions', 'code/myproject/types', 'code/myproject/errors']; + const result = buildIndexHubOverlay('myproject', 'code', slugs); + // 1 index node + 3 component nodes + expect(result.nodes).toHaveLength(4); + }); + + it('all edges have relation CONTAINS from index to each slug', () => { + const slugs = ['code/proj/a', 'code/proj/b']; + const result = buildIndexHubOverlay('proj', 'code', slugs); + expect(result.edges).toHaveLength(2); + for (const edge of result.edges) { + expect(edge.relation).toBe('CONTAINS'); + expect(slugs).toContain(edge.to); + } + }); + + it('empty slugs → returns only index node, no edges', () => { + const result = buildIndexHubOverlay('proj', 'code', []); + expect(result.nodes).toHaveLength(1); + expect(result.edges).toHaveLength(0); + expect(result.nodes[0].type).toBe('architecture'); + }); + + it('skips a slug equal to the index slug to avoid self-loops', () => { + const indexSlug = 'code/proj/index'; + const slugs = [indexSlug, 'code/proj/other']; + const result = buildIndexHubOverlay('proj', 'code', slugs); + // index node + 1 component node (self-slug skipped) + expect(result.nodes).toHaveLength(2); + expect(result.edges).toHaveLength(1); + expect(result.edges[0].to).toBe('code/proj/other'); + }); + + it('returns a valid GraphIndex with schemaVersion', () => { + const result = buildIndexHubOverlay('p', 'out', ['out/p/x']); + expect(result.schemaVersion).toBe('team-wiki.graph-index.v1'); + expect(result.generatedAt).toBeTruthy(); + }); +}); + +// --------------------------------------------------------------------------- +// doc-graph-extractor +// --------------------------------------------------------------------------- + +describe('extractDocStructure', () => { + it('creates a page node with given slug and title', () => { + const result = extractDocStructure('# Hello\n\nContent', 'docs/hello', 'docs/hello.md'); + const pageNode = result.nodes.find(n => n.slug === 'docs/hello'); + expect(pageNode).toBeDefined(); + expect(pageNode!.type).toBe('source'); + }); + + it('extracts h2/h3 headings as section nodes with CONTAINS edges', () => { + const content = '## Overview\n\nSome text\n\n### Details\n\nMore'; + const result = extractDocStructure(content, 'docs/page', 'docs/page.md'); + const sectionNodes = result.nodes.filter(n => n.slug.includes('#')); + expect(sectionNodes.length).toBe(2); + const containsEdges = result.edges.filter(e => e.relation === 'CONTAINS'); + expect(containsEdges.length).toBe(2); + }); + + it('extracts wiki links as REFERENCES edges', () => { + const content = 'See [[other-page]] for more.'; + const result = extractDocStructure(content, 'docs/page', 'docs/page.md'); + const refEdge = result.edges.find(e => e.relation === 'REFERENCES'); + expect(refEdge).toBeDefined(); + expect(refEdge!.from).toBe('docs/page'); + }); + + it('deduplicates wiki links pointing to the same target', () => { + const content = 'See [[shared]] and also [[shared]].'; + const result = extractDocStructure(content, 'docs/page', 'docs/page.md'); + const refEdges = result.edges.filter(e => e.relation === 'REFERENCES'); + expect(refEdges.length).toBe(1); + }); + + it('skips self-referencing wiki links', () => { + const content = '[[page]] self link'; + const result = extractDocStructure(content, 'page', 'page.md'); + const selfEdge = result.edges.find(e => e.to === 'page' && e.relation === 'REFERENCES'); + expect(selfEdge).toBeUndefined(); + }); + + it('respects pageCategory and domain options', () => { + const result = extractDocStructure('content', 'slug', 'file.md', { + pageCategory: 'component', + domain: 'infra', + pageTitle: 'My Page', + }); + const pageNode = result.nodes[0]; + expect(pageNode.type).toBe('component'); + expect(pageNode.domain).toBe('infra'); + expect(pageNode.title).toBe('My Page'); + }); + + it('deduplicates duplicate heading slugs with numeric suffix', () => { + const content = '## Intro\n\ntext\n\n## Intro\n\nmore'; + const result = extractDocStructure(content, 'p', 'p.md'); + const sectionSlugs = result.nodes.filter(n => n.slug.includes('#')).map(n => n.slug); + expect(new Set(sectionSlugs).size).toBe(sectionSlugs.length); + expect(sectionSlugs.some(s => s.includes('-2'))).toBe(true); + }); +}); + +describe('extractDocEntities', () => { + it('extracts HTTP API endpoints as interface nodes', () => { + const content = 'Call GET /v1/users to list users.'; + const result = extractDocEntities(content, 'docs/api', 'docs/api.md'); + const apiNode = result.nodes.find(n => n.type === 'interface'); + expect(apiNode).toBeDefined(); + expect(apiNode!.slug).toContain('api:'); + }); + + it('extracts error codes', () => { + const content = 'Returns Err40001 on invalid input.'; + const result = extractDocEntities(content, 'docs/errors', 'docs/errors.md'); + const errNode = result.nodes.find(n => n.type === 'error'); + expect(errNode).toBeDefined(); + expect(errNode!.title).toBe('Err40001'); + }); + + it('extracts config keys from backtick constants', () => { + const content = 'Set `MAX_RETRY` to control retries.'; + const result = extractDocEntities(content, 'docs/config', 'docs/config.md'); + const cfgNode = result.nodes.find(n => n.type === 'config'); + expect(cfgNode).toBeDefined(); + }); + + it('deduplicates repeated API mentions — one node, one edge', () => { + const content = 'GET /v1/items and GET /v1/items again.'; + const result = extractDocEntities(content, 'docs/p', 'docs/p.md'); + const apiNodes = result.nodes.filter(n => n.type === 'interface'); + expect(apiNodes.length).toBe(1); + }); + + it('returns empty nodes for plain prose with no patterns', () => { + const content = 'Just some plain text without any special patterns.'; + const result = extractDocEntities(content, 'docs/plain', 'docs/plain.md'); + expect(result.nodes).toHaveLength(0); + }); +}); + +describe('wikiLinkToPageSlug', () => { + it('strips leading slashes and .md extension', () => { + expect(wikiLinkToPageSlug('/docs/guide.md')).toBe('guide'); + }); + + it('returns slugified last segment of a path link', () => { + expect(wikiLinkToPageSlug('folder/My Page')).toBe('my-page'); + }); +}); + +describe('entitySlugFor', () => { + it('returns doc-entity::', () => { + expect(entitySlugFor('api', 'GET /v1/users')).toBe('doc-entity:api:get-v1-users'); + }); + + it('handles empty anchor with unknown fallback', () => { + expect(entitySlugFor('config', '---')).toBe('doc-entity:config:unknown'); + }); +}); From ed425393042d7c9e857c0db72e8b4419f94e96cb Mon Sep 17 00:00:00 2001 From: m0Nst3r873 Date: Fri, 26 Jun 2026 19:42:02 +0800 Subject: [PATCH 4/4] fix: apply quality fixes B1-B22 + wire CLI --extract option Bug fixes applied: - B1: unify graph-index path to .indices/ (was .teamwiki/.indices/) - B2: fix router.md links (evidence/code/ prefix) - B3: add teamwiki to safeIgnore - B4: remove stale .teamwiki/evidence check - B5: use saveGraphIndex() instead of manual writeFile - B9: unify graph schema to GraphIndex (remove CodeGraphIndex) - B13: filter third-party npm imports from relation facts - B15: priority sort: key files first, then shallow dirs - B16: generate deterministic overview.md - B17: rename call-chains to dependency-paths (not runtime calls) - B18: Python extractor: only service-pattern functions as components - B19: facts deduplication by kind:name - B21: doc-graph config pattern restricted to SCREAMING_SNAKE_CASE - B22: API path pattern no longer requires /v\d*/ prefix CLI integration: - Add --extract, --incremental, --project, --max-files to codebase command - Add extract branch to codebase-cmd.ts - Add teamwiki/ to .gitignore --- .gitignore | 1 + src/codebase-cmd.ts | 19 +++ src/codebase-extract.ts | 150 ++++++++++++++---- src/index.ts | 6 +- src/wiki-engine/adapters/index.ts | 2 +- src/wiki-engine/adapters/templates.ts | 2 +- src/wiki-engine/call-chain-tracer.ts | 2 + .../code-knowledge/code-collector.ts | 15 +- .../code-knowledge/code-extractors.ts | 15 +- src/wiki-engine/code-knowledge/code-graph.ts | 66 +++++--- .../code-knowledge/extractors/python.ts | 8 +- .../code-knowledge/extractors/typescript.ts | 11 +- src/wiki-engine/core/graph-index.schema.ts | 21 ++- src/wiki-engine/core/wiki-protocol.ts | 5 +- src/wiki-engine/doc-graph-extractor.ts | 4 +- 15 files changed, 248 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index 644ed48..376dfe2 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ docs/codebase.md docs/llm-wiki.md roadmap_jael.md validation/ +teamwiki/ diff --git a/src/codebase-cmd.ts b/src/codebase-cmd.ts index 2633fa8..9b22318 100644 --- a/src/codebase-cmd.ts +++ b/src/codebase-cmd.ts @@ -13,11 +13,15 @@ import type { Severity, LintReport, FixResult } from './codebase-lint.js'; export interface CodebaseCmdOptions extends GlobalOptions { lint?: boolean; fix?: boolean; + extract?: boolean | string; + incremental?: boolean; severity?: Severity; staleDays?: string; pendingReviewThreshold?: string; json?: boolean; output?: string; + project?: string; + maxFiles?: string; } // ─── Helpers ───────────────────────────────────────────────────────────────── @@ -57,10 +61,25 @@ function hasHighIssues(report: LintReport): boolean { export async function codebaseCmd(opts: CodebaseCmdOptions): Promise { const cwd = process.cwd(); + if (opts.extract) { + const { extractCodebase } = await import('./codebase-extract.js'); + const extractPath = typeof opts.extract === 'string' ? opts.extract : cwd; + await extractCodebase({ + path: extractPath, + incremental: opts.incremental, + json: opts.json, + project: opts.project, + maxFiles: opts.maxFiles ? parseInt(opts.maxFiles, 10) : undefined, + }); + return; + } + if (!opts.lint) { console.log('teamai codebase — 团队 codebase 文档健康度管理'); console.log(''); console.log('用法:'); + console.log(' teamai codebase --extract [path] 提取代码知识 + 构建图谱'); + console.log(' teamai codebase --extract --incremental 增量模式'); console.log(' teamai codebase --lint 运行全局一致性检查'); console.log(' teamai codebase --lint --fix 检查并自动修复低风险问题'); console.log(' teamai codebase --lint --json 输出 JSON 报告(适合 CI)'); diff --git a/src/codebase-extract.ts b/src/codebase-extract.ts index c80926a..eb5bb57 100644 --- a/src/codebase-extract.ts +++ b/src/codebase-extract.ts @@ -21,8 +21,10 @@ import { buildIndexHubOverlay, mergeGraphs, createGraphIndex, + saveGraphIndex, } from './wiki-engine/adapters/index.js'; -import type { CodeFact, CodeGraphIndex, InterfaceInventory, CallChain } from './wiki-engine/adapters/index.js'; +import type { CodeFact, InterfaceInventory, CallChain } from './wiki-engine/adapters/index.js'; +import type { GraphIndex } from './wiki-engine/core/graph-index.schema.js'; import { routerTemplate, indexTemplate, HOT_TEMPLATE } from './wiki-engine/adapters/templates.js'; export interface ExtractCodebaseOptions { @@ -51,12 +53,12 @@ interface KnowledgeGap { function detectKnowledgeGaps( facts: CodeFact[], - graph: CodeGraphIndex, + graph: GraphIndex, files: Array<{ relativePath: string }>, ): KnowledgeGap[] { const gaps: KnowledgeGap[] = []; const scannedFiles = new Set(files.map((f) => f.relativePath)); - const nodeFiles = new Set(graph.nodes.map((n) => n.file)); + const nodeSlugs = new Set(graph.nodes.map((n) => n.slug)); const connectedNodes = new Set(); for (const edge of graph.edges) { connectedNodes.add(edge.from); @@ -109,7 +111,7 @@ function detectKnowledgeGaps( // 3. 孤立组件:有节点但与图谱中其他节点无任何连接 const orphanNodes = graph.nodes.filter( - (n) => !connectedNodes.has(n.id) && !connectedNodes.has(n.file), + (n) => !connectedNodes.has(n.slug), ); if (orphanNodes.length > 5 && orphanNodes.length > graph.nodes.length * 0.3) { gaps.push({ @@ -231,17 +233,19 @@ function buildEvidencePages( pages.set('interfaces.md', ifLines.join('\n')); } - // Call Chains page + // Dependency Paths page if (callChains && callChains.length > 0) { const ccLines = [ '---', - `title: ${project} call chains`, + `title: ${project} dependency paths`, 'domain: code-knowledge', '---', '', - '# Call Chains', + '# Dependency Paths', '', - `${callChains.length} call chain(s) traced from entry points (max depth 4).`, + 'Static import dependency paths (not runtime call traces).', + '', + `${callChains.length} dependency path(s) traced from entry points (max depth 4).`, '', ]; for (const chain of callChains.slice(0, 20)) { @@ -253,7 +257,7 @@ function buildEvidencePages( } ccLines.push(''); } - pages.set('call-chains.md', ccLines.join('\n')); + pages.set('dependency-paths.md', ccLines.join('\n')); } const indexLines = [ @@ -296,7 +300,7 @@ function buildEvidencePages( function buildModuleSummaries( facts: CodeFact[], - graph: CodeGraphIndex, + graph: GraphIndex, project: string, ): Map { const modules = new Map(); @@ -405,6 +409,102 @@ function buildModuleSummaries( return summaries; } +/** + * Generate a deterministic overview.md from facts + graph (B16). + * Provides basic architecture context without AI calls. + */ +function buildOverview( + facts: CodeFact[], + graph: GraphIndex, + project: string, + interfaceInventory: InterfaceInventory, + callChains: CallChain[], +): string { + const modules = new Map(); + for (const fact of facts) { + if (fact.kind === 'relation') continue; + const mod = fact.file.split('/')[0] || '_root'; + const existing = modules.get(mod) ?? []; + existing.push(fact); + modules.set(mod, existing); + } + + const lines = [ + '---', + `title: ${project} overview`, + 'domain: code-knowledge', + '---', + '', + `# ${project}`, + '', + `**${facts.length} facts** extracted from ${new Set(facts.map(f => f.file)).size} files.`, + `Graph: ${graph.nodes.length} nodes, ${graph.edges.length} edges.`, + '', + '## Module Structure', + '', + '| Module | Facts | Components | Interfaces |', + '|--------|-------|------------|------------|', + ]; + + const sortedModules = [...modules.entries()] + .filter(([, mf]) => mf.length >= 3) + .sort((a, b) => b[1].length - a[1].length); + + for (const [mod, mf] of sortedModules) { + const comps = mf.filter(f => f.kind === 'component').length; + const ifaces = mf.filter(f => f.kind === 'interface').length; + lines.push(`| ${mod} | ${mf.length} | ${comps} | ${ifaces} |`); + } + + // Module dependency direction + lines.push(''); + lines.push('## Dependencies'); + lines.push(''); + const depMap = new Map>(); + for (const edge of graph.edges) { + const fromMod = edge.from.split('/')[0] || '_root'; + const toMod = edge.to.split('/')[0] || '_root'; + if (fromMod !== toMod) { + const existing = depMap.get(fromMod) ?? new Set(); + existing.add(toMod); + depMap.set(fromMod, existing); + } + } + if (depMap.size > 0) { + for (const [mod, deps] of depMap) { + lines.push(`- **${mod}** → ${[...deps].join(', ')}`); + } + } else { + lines.push('(No cross-module dependencies detected)'); + } + + // Interface summary + if (interfaceInventory.entries.length > 0) { + lines.push(''); + lines.push('## Interfaces'); + lines.push(''); + const byType: Record = {}; + for (const e of interfaceInventory.entries) { + byType[e.type] = (byType[e.type] ?? 0) + e.count; + } + lines.push(`Types: ${Object.entries(byType).map(([t, c]) => `${t}(${c})`).join(', ')}`); + } + + // Dependency paths summary + if (callChains.length > 0) { + lines.push(''); + lines.push('## Key Dependency Paths'); + lines.push(''); + for (const chain of callChains.slice(0, 5)) { + const path = chain.steps.map(s => s.symbol).join(' → '); + lines.push(`- ${chain.entryPoint}: ${path}`); + } + } + + lines.push(''); + return lines.join('\n'); +} + export async function extractCodebase(opts: ExtractCodebaseOptions): Promise { const root = path.resolve(opts.path || '.'); const project = opts.project || path.basename(root); @@ -412,7 +512,6 @@ export async function extractCodebase(opts: ExtractCodebaseOptions): Promise `evidence/code/${project}/${p.replace('.md', '')}`); const overlay = buildIndexHubOverlay(project, 'evidence/code', pageSlugs); - // Merge overlay nodes/edges into CodeGraphIndex format - const overlayNodes = overlay.nodes - .filter(n => !graph.nodes.some(gn => gn.id === n.slug)) - .map(n => ({ id: n.slug, kind: 'component' as const, label: n.title, file: '' })); - const overlayEdges = overlay.edges - .map(e => ({ from: e.from, to: e.to, relation: 'mentions' as const })); - - const mergedGraph: CodeGraphIndex = { - schemaVersion: graph.schemaVersion ?? 'team-wiki.graph-index.v1', - generatedAt: new Date().toISOString(), - nodes: [...graph.nodes, ...overlayNodes], - edges: [...graph.edges, ...overlayEdges], - }; + // Merge overlay into the unified GraphIndex + const mergedGraph = mergeGraphs(graph, overlay); - await writeFile( - path.join(indicesDir, 'graph-index.json'), - JSON.stringify(mergedGraph, null, 2), - 'utf-8', - ); + // Write graph-index.json using protocol function (B5) + await saveGraphIndex(wikiRoot, mergedGraph); // 生成模块级摘要页(按顶层目录聚合) const moduleSummaries = buildModuleSummaries(facts, graph, project); @@ -500,6 +584,10 @@ export async function extractCodebase(opts: ExtractCodebaseOptions): Promise', 'Project slug for extract output (default: directory name)')) + .addOption(new Option('--max-files ', 'Max source files to scan (default: 200)')) .option('--lint', 'Run global consistency lint over docs/team-codebase') .option('--fix', 'Apply low-risk mechanical fixes (only with --lint)') .option('--severity ', 'Minimum severity to report: high|medium|low|info', 'info') diff --git a/src/wiki-engine/adapters/index.ts b/src/wiki-engine/adapters/index.ts index 2d7a8e2..b116b60 100644 --- a/src/wiki-engine/adapters/index.ts +++ b/src/wiki-engine/adapters/index.ts @@ -10,7 +10,7 @@ export { extractCodeFacts } from '../code-knowledge/code-extractors.js'; export type { CodeFact, CodeFactKind, CodeEvidenceType } from '../code-knowledge/code-extractors.js'; export { buildCodeGraph, buildCodeGraphIndex } from '../code-knowledge/code-graph.js'; -export type { CodeGraphIndex } from '../code-knowledge/code-graph.js'; +export type { CodeGraphIndex } from '../code-knowledge/code-graph.js'; // deprecated alias for GraphIndex export { detectCodeIncrementalChanges } from '../code-knowledge/code-incremental.js'; diff --git a/src/wiki-engine/adapters/templates.ts b/src/wiki-engine/adapters/templates.ts index 35c35dd..a1b367b 100644 --- a/src/wiki-engine/adapters/templates.ts +++ b/src/wiki-engine/adapters/templates.ts @@ -1,5 +1,5 @@ export function routerTemplate(projects: Array<{ slug: string; label: string }>): string { - const links = projects.map(p => `- [[code/${p.slug}/index]] — ${p.label} 代码知识`).join('\n'); + const links = projects.map(p => `- [[evidence/code/${p.slug}/index]] — ${p.label} 代码知识`).join('\n'); return `# Team Wiki Router\n\nRoute broad questions to the relevant domain entrypoint.\n\n${links}\n`; } diff --git a/src/wiki-engine/call-chain-tracer.ts b/src/wiki-engine/call-chain-tracer.ts index 6d3da50..04e5b9f 100644 --- a/src/wiki-engine/call-chain-tracer.ts +++ b/src/wiki-engine/call-chain-tracer.ts @@ -66,6 +66,8 @@ function classifyLayer(filePath: string, symbol: string): CallChainLayer { * Trace call chains from entry points through the codebase. * Simplified version of codebase-mind's 3-layer penetration analysis. * + * Note: traces import/dependency edges, not runtime call sites. Output represents static dependency paths. + * * 1. Find entry points (handlers, routes, main functions) * 2. For each entry point, trace through relations (imports/calls) * 3. Classify each step by layer (entry -> orchestration -> service -> data) diff --git a/src/wiki-engine/code-knowledge/code-collector.ts b/src/wiki-engine/code-knowledge/code-collector.ts index 754a020..110fc31 100644 --- a/src/wiki-engine/code-knowledge/code-collector.ts +++ b/src/wiki-engine/code-knowledge/code-collector.ts @@ -52,7 +52,20 @@ export async function collectCode(options: CollectCodeOptions): Promise<{ manife const filePaths: string[] = []; await walk(root, filePaths, options.includeTests ?? false); - let filtered = filePaths.sort(); + // Sort: key files first, then by directory depth (shallow first) + let filtered = filePaths.sort((a, b) => { + const relA = toPosix(path.relative(root, a)); + const relB = toPosix(path.relative(root, b)); + const langA = languageFor(a); + const langB = languageFor(b); + const keyA = isKeyFile(relA, langA) ? 0 : 1; + const keyB = isKeyFile(relB, langB) ? 0 : 1; + if (keyA !== keyB) return keyA - keyB; + const depthA = relA.split('/').length; + const depthB = relB.split('/').length; + if (depthA !== depthB) return depthA - depthB; + return relA.localeCompare(relB); + }); // Filter to only changed files if specified if (options.changedFiles && options.changedFiles.length > 0) { diff --git a/src/wiki-engine/code-knowledge/code-extractors.ts b/src/wiki-engine/code-knowledge/code-extractors.ts index c37dd41..4d16ee8 100644 --- a/src/wiki-engine/code-knowledge/code-extractors.ts +++ b/src/wiki-engine/code-knowledge/code-extractors.ts @@ -42,11 +42,20 @@ export interface CodeFact { */ export function extractCodeFacts(files: CodeCollectedFile[]): CodeFact[] { const byLanguage = groupByLanguage(files); - const facts: CodeFact[] = []; + const allFacts: CodeFact[] = []; for (const [language, langFiles] of byLanguage) { - facts.push(...extractForLanguage(language, langFiles)); + allFacts.push(...extractForLanguage(language, langFiles)); } - return dedupe(facts); + // Deduplicate facts by kind:name (keep first occurrence) + const seen = new Set(); + const deduped = allFacts.filter(f => { + if (f.kind === 'relation') return true; // relations are always unique by file context + const key = `${f.kind}:${f.name}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + return deduped; } function groupByLanguage(files: CodeCollectedFile[]): Map { diff --git a/src/wiki-engine/code-knowledge/code-graph.ts b/src/wiki-engine/code-knowledge/code-graph.ts index 953905b..49e2c7b 100644 --- a/src/wiki-engine/code-knowledge/code-graph.ts +++ b/src/wiki-engine/code-knowledge/code-graph.ts @@ -9,40 +9,70 @@ import { createGraphIndex, addNode, addEdge, + saveGraphIndex, GRAPH_INDEX_SCHEMA_VERSION, } from "../core/graph-index.schema.js"; -export interface CodeGraphIndex { - schemaVersion: "team-wiki.code-graph.v1"; - generatedAt: string; - nodes: Array<{ id: string; kind: CodeFact["kind"]; label: string; file: string }>; - edges: Array<{ from: string; to: string; relation: "imports" | "mentions" }>; -} +/** + * @deprecated Use GraphIndex directly. Kept for backward compatibility during migration. + */ +export type CodeGraphIndex = GraphIndex; -export async function writeCodeGraph(wikiRoot: string, project: string, facts: CodeFact[]): Promise<{ graph: CodeGraphIndex; path: string }> { +export async function writeCodeGraph(wikiRoot: string, project: string, facts: CodeFact[]): Promise<{ graph: GraphIndex; path: string }> { const graph = buildCodeGraph(facts); - const graphPath = path.join(wikiRoot, "graph", `${project}-graph-index.json`); - await mkdir(path.dirname(graphPath), { recursive: true }); - await writeFile(graphPath, `${JSON.stringify(graph, null, 2)}\n`, "utf8"); + const graphPath = await saveGraphIndex(wikiRoot, graph); return { graph, path: graphPath }; } -export function buildCodeGraph(facts: CodeFact[]): CodeGraphIndex { - const nodes = facts +/** + * Build a GraphIndex from raw code facts. + * Nodes: one per unique component/interface/config/error fact. + * Edges: DEPENDS_ON edges from relation facts (internal imports only). + */ +export function buildCodeGraph(facts: CodeFact[]): GraphIndex { + const nodes: GraphNode[] = facts .filter((fact) => fact.kind !== "relation") - .map((fact) => ({ id: `${fact.kind}:${fact.name}:${fact.file}`, kind: fact.kind, label: fact.name, file: fact.file })); - const nodeFiles = new Set(nodes.map((node) => node.file)); - const edges = facts + .map((fact) => ({ + slug: `${fact.kind}/${fact.name}`, + type: mapFactKindToCategory(fact.kind), + confidence: fact.confidence === "EXTRACTED" ? "EXTRACTED" as const : "INFERRED" as const, + title: fact.name, + domain: path.dirname(fact.file).split('/')[0] || undefined, + })); + + const nodeFiles = new Set(facts.filter(f => f.kind !== "relation").map(f => f.file)); + const edges: GraphEdge[] = facts .filter((fact) => fact.kind === "relation") - .flatMap((fact) => [...nodeFiles].filter((file) => relationMayTarget(fact.name, file)).map((file) => ({ from: fact.file, to: file, relation: "imports" as const }))); - return { schemaVersion: "team-wiki.code-graph.v1", generatedAt: new Date().toISOString(), nodes, edges }; + .flatMap((fact) => { + const targets = [...nodeFiles].filter((file) => relationMayTarget(fact.name, file)); + return targets.map((file) => ({ + from: fact.file, + to: file, + relation: "DEPENDS_ON" as const, + weight: 0.8, + source: "code-heuristic" as const, + })); + }); + + return createGraphIndex(nodes, edges); } function relationMayTarget(importTarget: string, file: string): boolean { - const normalized = importTarget.replace(/^\.\//u, "").replace(/\.(ts|tsx|js|jsx)$/u, ""); + const normalized = importTarget.replace(/^\.\//u, "").replace(/\.\.\//g, "").replace(/\.(ts|tsx|js|jsx)$/u, ""); + if (normalized.length < 3) return false; // Skip very short matches to reduce false positives return file.includes(normalized); } +function mapFactKindToCategory(kind: string): "component" | "interface" | "config" | "error" { + switch (kind) { + case "component": return "component"; + case "interface": return "interface"; + case "config": return "config"; + case "error": return "error"; + default: return "component"; + } +} + // ─── Unified Graph Compiler: build a full GraphIndex from component-level data ── export interface CodeComponent { diff --git a/src/wiki-engine/code-knowledge/extractors/python.ts b/src/wiki-engine/code-knowledge/extractors/python.ts index 3397372..bfc8125 100644 --- a/src/wiki-engine/code-knowledge/extractors/python.ts +++ b/src/wiki-engine/code-knowledge/extractors/python.ts @@ -21,10 +21,14 @@ export function extractPython(files: CodeCollectedFile[]): CodeFact[] { facts.push(makeFact("component", classDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); } - // Module-level function (not indented) + // Module-level function: only promote to component if it matches service patterns const funcDecl = /^(?:async\s+)?def\s+([a-z_][a-z0-9_]*)\s*\(/u.exec(line); if (funcDecl) { - facts.push(makeFact("component", funcDecl[1], file.relativePath, lineNumber, line, "EXTRACTED")); + const name = funcDecl[1]; + const isServiceFunc = /(?:handler|service|controller|command|worker|task|process|execute|dispatch|route)/i.test(name); + if (isServiceFunc) { + facts.push(makeFact("component", name, file.relativePath, lineNumber, line, "EXTRACTED")); + } } // --- Interfaces --- diff --git a/src/wiki-engine/code-knowledge/extractors/typescript.ts b/src/wiki-engine/code-knowledge/extractors/typescript.ts index 7c3c566..3c08af7 100644 --- a/src/wiki-engine/code-knowledge/extractors/typescript.ts +++ b/src/wiki-engine/code-knowledge/extractors/typescript.ts @@ -74,14 +74,14 @@ export function extractTypescript(files: CodeCollectedFile[]): CodeFact[] { facts.push(makeFact("error", errorConst[1], file.relativePath, lineNumber, line, "INFERRED")); } - // --- Relations --- + // --- Relations (only internal/relative imports, skip third-party packages) --- const importFrom = /^import\s+.*?from\s+["']([^"']+)["']/u.exec(line); - if (importFrom) { + if (importFrom && isProjectRelativeImport(importFrom[1])) { facts.push(makeFact("relation", importFrom[1], file.relativePath, lineNumber, line, "EXTRACTED")); } const dynamicImport = /(?:await\s+)?import\s*\(\s*["']([^"']+)["']\s*\)/u.exec(line); - if (dynamicImport && !importFrom) { + if (dynamicImport && !importFrom && isProjectRelativeImport(dynamicImport[1])) { facts.push(makeFact("relation", dynamicImport[1], file.relativePath, lineNumber, line, "INFERRED")); } } @@ -90,6 +90,11 @@ export function extractTypescript(files: CodeCollectedFile[]): CodeFact[] { return facts; } +/** Only keep project-relative imports (starts with . or /) — skip npm packages */ +function isProjectRelativeImport(target: string): boolean { + return target.startsWith('.') || target.startsWith('/'); +} + function makeFact( kind: CodeFactKind, name: string, diff --git a/src/wiki-engine/core/graph-index.schema.ts b/src/wiki-engine/core/graph-index.schema.ts index b6ec260..c15ba38 100644 --- a/src/wiki-engine/core/graph-index.schema.ts +++ b/src/wiki-engine/core/graph-index.schema.ts @@ -345,28 +345,25 @@ export function computeGraphHealth(graph: GraphIndex): GraphHealthMetrics { /** * Load graph-index.json from the wiki's indices directory. + * Canonical path: wikiRoot/.indices/graph-index.json * Returns null if the file doesn't exist. */ export async function loadGraphIndex(wikiRoot: string): Promise { - const paths = [ - path.join(wikiRoot, ".teamwiki", ".indices", "graph-index.json"), - path.join(wikiRoot, ".indices", "graph-index.json"), - path.join(wikiRoot, "graph", "graph-index.json"), - ]; - for (const p of paths) { - try { - const raw = await readFile(p, "utf8"); - return JSON.parse(raw) as GraphIndex; - } catch { /* continue */ } + const graphPath = path.join(wikiRoot, ".indices", "graph-index.json"); + try { + const raw = await readFile(graphPath, "utf8"); + return JSON.parse(raw) as GraphIndex; + } catch { + return null; } - return null; } /** * Save graph-index.json to the wiki's indices directory. + * Canonical path: wikiRoot/.indices/graph-index.json */ export async function saveGraphIndex(wikiRoot: string, graph: GraphIndex): Promise { - const dir = path.join(wikiRoot, ".teamwiki", ".indices"); + const dir = path.join(wikiRoot, ".indices"); await mkdir(dir, { recursive: true }); const outPath = path.join(dir, "graph-index.json"); await writeFile(outPath, JSON.stringify(graph, null, 2), "utf8"); diff --git a/src/wiki-engine/core/wiki-protocol.ts b/src/wiki-engine/core/wiki-protocol.ts index 3e446a0..d75c723 100644 --- a/src/wiki-engine/core/wiki-protocol.ts +++ b/src/wiki-engine/core/wiki-protocol.ts @@ -137,6 +137,7 @@ export const WIKI_CATEGORIES: WikiCategory[] = [ const SAFE_IGNORE_SEGMENTS = new Set([ ".git", ".teamwiki", + "teamwiki", "node_modules", "dist", "build", @@ -151,10 +152,6 @@ const SENSITIVE_FILE_NAMES = new Set(["credentials.json"]); export function safeIgnore(filePath: string): boolean { const normalized = toPosix(filePath); - // Compiled code evidence pages live under .teamwiki/evidence/ and must be writable. - if (normalized.startsWith(".teamwiki/evidence/")) { - return false; - } const parts = normalized.split("/").filter(Boolean); if (parts.some((part) => SAFE_IGNORE_SEGMENTS.has(part))) { return true; diff --git a/src/wiki-engine/doc-graph-extractor.ts b/src/wiki-engine/doc-graph-extractor.ts index 7e2bf06..7b9cf04 100644 --- a/src/wiki-engine/doc-graph-extractor.ts +++ b/src/wiki-engine/doc-graph-extractor.ts @@ -111,7 +111,7 @@ export function extractDocEntities( const edges: GraphEdge[] = []; const seenEntitySlugs = new Set(); - const apiPattern = /(GET|POST|PUT|DELETE|PATCH)\s+(\/v?\d*\/[a-z0-9/_\-{}:.]+)/gi; + const apiPattern = /(GET|POST|PUT|DELETE|PATCH)\s+(\/[a-z0-9/_\-{}:.]+)/gi; let match: RegExpExecArray | null; while ((match = apiPattern.exec(content)) !== null) { const method = match[1].toUpperCase(); @@ -138,7 +138,7 @@ export function extractDocEntities( addEntity(entitySlugFor("config", key.toLowerCase()), "config", key, match.index); } - const configAssignPattern = /(?:^|\n)\s*([a-z][a-z0-9_.-]{2,})\s*[:=]\s*/gim; + const configAssignPattern = /^\s*([A-Z][A-Z0-9_]{2,})\s*[:=]\s*/gm; while ((match = configAssignPattern.exec(content)) !== null) { const key = match[1]; if (/^(http|https|get|post|put|delete|patch)$/i.test(key)) {