From 1c6b1e46ffcad37905c438c1d5c338d9278d5ddc Mon Sep 17 00:00:00 2001 From: oshrizak <63424207+oshrizak@users.noreply.github.com> Date: Tue, 26 May 2026 10:44:12 -0700 Subject: [PATCH] feat(pipeline): dispatch suggested library specialists during extraction and merge their output --- src/pipeline/contribute.ts | 5 +- src/pipeline/extraction.ts | 120 ++++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 5 deletions(-) diff --git a/src/pipeline/contribute.ts b/src/pipeline/contribute.ts index 298359b..ce8abdf 100644 --- a/src/pipeline/contribute.ts +++ b/src/pipeline/contribute.ts @@ -3,8 +3,9 @@ import { loadImage, type PipelineContext } from "./context.ts"; import { ACCESSIBILITY_REQUIREMENTS } from "./accessibility.ts"; import { createAgentIssue } from "../github/issue.ts"; -// Content types already covered by the standard library — never suggest these. -const STANDARD = new Set([ +// Content types already covered by the standard library — never suggest these, +// and never dispatch the generic page extraction to them (see extraction.ts). +export const STANDARD = new Set([ "paragraph", "heading", "list", "table", "formField", "image", "quote", "caption", "footnote", ]); diff --git a/src/pipeline/extraction.ts b/src/pipeline/extraction.ts index 54067f8..887c8d9 100644 --- a/src/pipeline/extraction.ts +++ b/src/pipeline/extraction.ts @@ -5,6 +5,7 @@ import { loadAgent, type AgentSpec } from "../agents/loader.ts"; import { feedbackPreamble, loadImage, type InputImage, type PipelineContext } from "./context.ts"; import { ACCESSIBILITY_REQUIREMENTS } from "./accessibility.ts"; import { verifyAgentOutput } from "./feedback.ts"; +import { STANDARD as STANDARD_AGENTS } from "./contribute.ts"; import type { Fragment } from "./fragment.ts"; const PAGE_AGENT = "page"; @@ -131,11 +132,109 @@ async function correctPage( return corrected || null; } +// Merge instruction for splicing a specialist fragment into the page output. +const MERGE_SYSTEM = `You merge a higher-fidelity HTML fragment, produced by a specialist agent, into an +existing accessible HTML page. Replace the page's weaker representation of that SAME content +with the specialist fragment and change nothing else — keep all other content, order, +headings, and structure exactly, and never leave both representations (no duplication). +Output body content only (no // wrapper). +Respond with ONLY this JSON: { "html": "" }`; + +// Run a library specialist agent against the whole page image, asking it to +// extract only the content its contract covers. Returns its HTML fragment, or +// null when it finds nothing. +async function runSpecialist(ctx: PipelineContext, agent: AgentSpec, img: InputImage): Promise { + const system = `${agent.content}\n\n${ACCESSIBILITY_REQUIREMENTS}`; + const user = + `Extract ONLY the content your contract covers from this page image (filename: ${img.name}). ` + + `If none is present, return {"no_content": true}. Otherwise respond with ONLY this JSON: ` + + `{ "no_content": false, "html": "" }`; + const capability = agent.capabilities.includes("vision") ? "vision" : "text"; + const res = await ctx.router.complete( + agent.name, + capability, + [ + { role: "system", content: system }, + { role: "user", content: user }, + ], + { images: [loadImage(img)] }, + ); + ctx.log.agentCall({ agent, phase: "extraction", image: img.name, output: res.text }); + const parsed = extractJson<{ no_content?: boolean; html?: string }>(res.text); + if (!parsed || parsed.no_content || !parsed.html?.trim()) return null; + return parsed.html.trim(); +} + +// Splice a specialist fragment into the page body, replacing the page's own +// (weaker) representation of that content. Returns the merged body, or null on +// failure (caller keeps the original page output). +async function mergeSpecialist( + ctx: PipelineContext, + img: InputImage, + pageHtml: string, + specialistName: string, + reason: string, + fragment: string, +): Promise { + const user = + `## Current page (body HTML)\n\`\`\`html\n${pageHtml}\n\`\`\`\n\n` + + `## Specialist (${specialistName}) fragment for the ${reason || "flagged"} content on this page\n` + + `\`\`\`html\n${fragment}\n\`\`\`\n\n` + + `Replace the page's existing representation of that content with this specialist fragment; ` + + `keep everything else unchanged.`; + const res = await ctx.router.complete(PAGE_AGENT, "text", [ + { role: "system", content: MERGE_SYSTEM }, + { role: "user", content: user }, + ]); + ctx.log.agentCall({ + agent: { name: PAGE_AGENT, file: "page.md", content: MERGE_SYSTEM, capabilities: ["text"], sha: null, sessionBuilt: false }, + phase: "extraction", + image: img.name, + output: res.text, + }); + const parsed = extractJson<{ html?: string }>(res.text); + return parsed?.html?.trim() || null; +} + +// If a page flagged a content type that an EXISTING library agent handles, run +// that specialist on the page and merge its higher-fidelity fragment into the +// page output. Non-blocking: any failure leaves the page output unchanged. +// dispatched=true means a library specialist ran (so the suggestion is already +// covered and should not be re-filed as a new-agent issue). +async function dispatchSpecialist( + ctx: PipelineContext, + img: InputImage, + pageHtml: string, + suggestion: { name: string; reason: string }, +): Promise<{ html: string; dispatched: boolean }> { + const logical = suggestion.name.replace(/\.md$/, ""); + if (STANDARD_AGENTS.has(logical)) return { html: pageHtml, dispatched: false }; + const specialist = loadAgent(logical, { + agentsDir: ctx.paths.agentsDir, + tmpAgentsDir: ctx.paths.tmpAgentsDir(ctx.sessionId), + }); + if (!specialist) return { html: pageHtml, dispatched: false }; + try { + const fragment = await runSpecialist(ctx, specialist, img); + if (!fragment) { + ctx.log.event("specialist_no_content", { agent: specialist.file, image: img.name }); + return { html: pageHtml, dispatched: true }; + } + const merged = await mergeSpecialist(ctx, img, pageHtml, specialist.name, suggestion.reason, fragment); + ctx.log.event("specialist_dispatched", { agent: specialist.file, image: img.name, merged: Boolean(merged) }); + return { html: merged ?? pageHtml, dispatched: true }; + } catch (e) { + ctx.log.event("specialist_dispatch_failed", { agent: specialist.file, image: img.name, error: (e as Error).message }); + return { html: pageHtml, dispatched: true }; + } +} + // One fragment per page, in submitted order. Each page is verified for source // fidelity at build time (PRD §7.5/§7.12); a page that fails gets one self- // correction pass. Verification is non-blocking — a run never fails because the -// Feedback Agent is unavailable or unsure. Pages may also flag a content type that -// warrants a specialist agent, collected as `suggestions` for the contribution step. +// Feedback Agent is unavailable or unsure. When a page flags a content type that an +// existing library agent handles, that specialist is dispatched and merged in; +// otherwise the suggestion is collected for the contribution step. export async function runExtraction(ctx: PipelineContext): Promise { const pageAgent = loadPageAgent(ctx); const fragments: Fragment[] = []; @@ -145,6 +244,19 @@ export async function runExtraction(ctx: PipelineContext): Promise