From abbb45da2f10eb59834cdec7862afd34cd38d096 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 23 Mar 2026 23:00:52 +0000 Subject: [PATCH 1/4] feat: add Scout batch territory qualification pipeline Three-tier funnel: header scan (no LLM), quick Gemini qualification, and full Lighthouse analysis for top prospects. Includes HTML sanitisation for LLM safety, SSE streaming API, and Scout UI. 57 unit tests covering sanitise, tier1, tier2, and pipeline. Co-Authored-By: Claude Opus 4.6 --- app/api/scout/route.ts | 84 +++++++ app/layout.tsx | 5 + app/page.tsx | 3 +- app/scout/page.tsx | 165 +++++++++++++ components/scout-input.tsx | 77 ++++++ components/scout-progress.tsx | 40 +++ components/scout-results-table.tsx | 185 ++++++++++++++ lib/__tests__/sanitise.test.ts | 172 +++++++++++++ lib/gemini/detect-tech-stack.ts | 3 +- lib/gemini/qualify-prospect.ts | 3 +- lib/sanitise.ts | 100 ++++++++ lib/scout/__tests__/pipeline.test.ts | 354 +++++++++++++++++++++++++++ lib/scout/__tests__/tier1.test.ts | 351 ++++++++++++++++++++++++++ lib/scout/__tests__/tier2.test.ts | 196 +++++++++++++++ lib/scout/pipeline.ts | 286 ++++++++++++++++++++++ lib/scout/tier1.ts | 224 +++++++++++++++++ lib/scout/tier2-schema.ts | 121 +++++++++ lib/scout/tier2.ts | 105 ++++++++ lib/scout/types.ts | 75 ++++++ 19 files changed, 2546 insertions(+), 3 deletions(-) create mode 100644 app/api/scout/route.ts create mode 100644 app/scout/page.tsx create mode 100644 components/scout-input.tsx create mode 100644 components/scout-progress.tsx create mode 100644 components/scout-results-table.tsx create mode 100644 lib/__tests__/sanitise.test.ts create mode 100644 lib/sanitise.ts create mode 100644 lib/scout/__tests__/pipeline.test.ts create mode 100644 lib/scout/__tests__/tier1.test.ts create mode 100644 lib/scout/__tests__/tier2.test.ts create mode 100644 lib/scout/pipeline.ts create mode 100644 lib/scout/tier1.ts create mode 100644 lib/scout/tier2-schema.ts create mode 100644 lib/scout/tier2.ts create mode 100644 lib/scout/types.ts diff --git a/app/api/scout/route.ts b/app/api/scout/route.ts new file mode 100644 index 0000000..92435b3 --- /dev/null +++ b/app/api/scout/route.ts @@ -0,0 +1,84 @@ +import { NextRequest, NextResponse } from "next/server" +import { isValidPublicUrl } from "@/lib/utils" +import { runScout } from "@/lib/scout/pipeline" + +export const maxDuration = 300 + +export async function POST(req: NextRequest) { + let body: any + try { + body = await req.json() + } catch { + return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 }) + } + + const { urls, tier3_limit, skip_vercel, skip_tier3 } = body + + if (!Array.isArray(urls) || urls.length === 0) { + return NextResponse.json( + { error: "urls must be a non-empty array of strings" }, + { status: 400 } + ) + } + + if (urls.length > 50) { + return NextResponse.json( + { error: "Maximum 50 URLs per scan" }, + { status: 400 } + ) + } + + // Validate each URL is a string + for (const url of urls) { + if (typeof url !== "string" || url.trim().length === 0) { + return NextResponse.json( + { error: `Invalid URL in list: ${url}` }, + { status: 400 } + ) + } + } + + const encoder = new TextEncoder() + + const stream = new ReadableStream({ + start(controller) { + ;(async () => { + try { + for await (const event of runScout(urls, { + tier3_limit: tier3_limit ?? 5, + skip_vercel: skip_vercel ?? true, + skip_tier3: skip_tier3 ?? false, + })) { + controller.enqueue( + encoder.encode(`data: ${JSON.stringify(event)}\n\n`) + ) + } + } catch (error) { + controller.enqueue( + encoder.encode( + `data: ${JSON.stringify({ + stage: "error", + data: { + message: + error instanceof Error + ? error.message + : "An unknown error occurred", + }, + })}\n\n` + ) + ) + } finally { + controller.close() + } + })() + }, + }) + + return new Response(stream, { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }, + }) +} diff --git a/app/layout.tsx b/app/layout.tsx index 764301f..f8bdd45 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -25,6 +25,11 @@ export default function RootLayout({ ◆ Lighthouse +
diff --git a/app/page.tsx b/app/page.tsx index 6436b69..866192c 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -6,6 +6,7 @@ import { UrlInput } from '@/components/url-input' import { ProspectCard } from '@/components/prospect-card' interface ProspectNode { + id?: string title: string body: string metadata?: Record @@ -86,7 +87,7 @@ export default function HomePage() { ) : (
{sorted.map((prospect, idx) => ( - + ))}
)} diff --git a/app/scout/page.tsx b/app/scout/page.tsx new file mode 100644 index 0000000..6806e1e --- /dev/null +++ b/app/scout/page.tsx @@ -0,0 +1,165 @@ +"use client" + +import { useState, useCallback, useRef } from "react" +import { ScoutInput } from "@/components/scout-input" +import { ScoutProgress } from "@/components/scout-progress" +import { ScoutResultsTable } from "@/components/scout-results-table" +import type { Tier1Result, Tier2Result } from "@/lib/scout/types" + +export default function ScoutPage() { + const [running, setRunning] = useState(false) + const [tier1Results, setTier1Results] = useState([]) + const [tier2Results, setTier2Results] = useState([]) + const [tier3Domains, setTier3Domains] = useState([]) + const [inputCount, setInputCount] = useState(0) + const [tier2Expected, setTier2Expected] = useState(0) + const [tier3Expected, setTier3Expected] = useState(0) + const [error, setError] = useState(null) + + const abortRef = useRef(null) + + const handleStart = useCallback( + async ( + urls: string[], + options: { tier3Limit: number; skipVercel: boolean } + ) => { + setRunning(true) + setTier1Results([]) + setTier2Results([]) + setTier3Domains([]) + setInputCount(urls.length) + setTier2Expected(0) + setTier3Expected(options.tier3Limit) + setError(null) + + const controller = new AbortController() + abortRef.current = controller + + try { + const res = await fetch("/api/scout", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + urls, + tier3_limit: options.tier3Limit, + skip_vercel: options.skipVercel, + skip_tier3: options.tier3Limit === 0, + }), + signal: controller.signal, + }) + + if (!res.ok) { + const data = await res.json().catch(() => ({})) + setError(data.error ?? `Request failed: ${res.status}`) + setRunning(false) + return + } + + const reader = res.body?.getReader() + if (!reader) { + setError("No response body") + setRunning(false) + return + } + + const decoder = new TextDecoder() + let buffer = "" + + while (true) { + const { done, value } = await reader.read() + if (done) break + + buffer += decoder.decode(value, { stream: true }) + + const lines = buffer.split("\n") + buffer = lines.pop() ?? "" + + for (const line of lines) { + if (!line.startsWith("data: ")) continue + const json = line.slice(6).trim() + if (!json) continue + + try { + const event = JSON.parse(json) + + if (event.stage === "tier1") { + const t1 = event.data as Tier1Result + setTier1Results((prev) => [...prev, t1]) + if (t1.verdict !== "skip") { + setTier2Expected((prev) => prev + 1) + } + } else if (event.stage === "tier2") { + setTier2Results((prev) => [...prev, event.data as Tier2Result]) + } else if (event.stage === "tier3") { + const msg = (event.data as { message: string }).message + if (msg.startsWith("Full analysis complete:")) { + const domain = msg.replace("Full analysis complete: ", "") + setTier3Domains((prev) => [...prev, domain]) + } + } else if (event.stage === "complete") { + // Done + } else if (event.stage === "error") { + console.warn("Scout error event:", event.data) + } + } catch { + // Ignore malformed events + } + } + } + } catch (err) { + if ((err as Error).name !== "AbortError") { + setError((err as Error).message ?? "Unknown error") + } + } finally { + setRunning(false) + abortRef.current = null + } + }, + [] + ) + + return ( +
+
+

Scout

+

+ Batch territory qualification. Paste URLs, get a ranked prospect list. +

+
+ + + + {running && ( + + )} + + {error && ( +
+ {error} +
+ )} + + + + {!running && tier2Results.length === 0 && tier1Results.length === 0 && !error && ( +
+

+ Paste a list of company URLs to qualify them for Vercel. +

+

+ Scout scans headers, qualifies via AI, and runs full analysis on the + top prospects. +

+
+ )} +
+ ) +} diff --git a/components/scout-input.tsx b/components/scout-input.tsx new file mode 100644 index 0000000..d0b5bf3 --- /dev/null +++ b/components/scout-input.tsx @@ -0,0 +1,77 @@ +"use client" + +import { useState } from "react" +import { Button } from "@/components/ui/button" + +interface ScoutInputProps { + onStart: (urls: string[], options: { tier3Limit: number; skipVercel: boolean }) => void + disabled: boolean +} + +export function ScoutInput({ onStart, disabled }: ScoutInputProps) { + const [text, setText] = useState("") + const [tier3Limit, setTier3Limit] = useState(5) + const [includeVercel, setIncludeVercel] = useState(false) + + const urls = text + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0) + + const count = urls.length + + return ( +
+