diff --git a/apps/desktop/src/core/mobile/service.ts b/apps/desktop/src/core/mobile/service.ts index a89d7a53..ba3f078b 100644 --- a/apps/desktop/src/core/mobile/service.ts +++ b/apps/desktop/src/core/mobile/service.ts @@ -17,13 +17,14 @@ import { upScoutAgent } from "../agents/service.ts"; import { queryFleet } from "../../server/db-queries.ts"; import { loadScoutBrokerContext, - loadScoutActivityItems, + readScoutBrokerHome, openScoutPeerSession, registerScoutLocalAgentBinding, replyToScoutMessage, sendScoutDirectMessage, sendScoutMessage, - type ScoutActivityItem, + type ScoutBrokerConversationRecord, + type ScoutBrokerHomeActivityRecord, type ScoutBrokerSnapshot, type ScoutDirectMessageResult, } from "../broker/service.ts"; @@ -613,6 +614,40 @@ export async function getScoutFleet( return queryFleet(options); } +/** + * Resolve whatever id the phone routed with onto a real broker conversation. + * The phone may send a conversation id directly (`c.…` from the activity feed, or + * a `dm.…` direct id) or a bare agent id (from the Agents tab). Not every agent + * has an `operator` DM — many only have ask/consult conversations keyed `c.…` — + * so when there's no direct hit and no `dm.operator.{agentId}`, fall back to the + * most-recent conversation the agent actually participates in. + */ +function resolveMobileConversation( + snapshot: ScoutBrokerSnapshot, + rawId: string, +): ScoutBrokerConversationRecord | null { + const direct = snapshot.conversations[rawId]; + if (direct) return direct; + + const operatorDm = snapshot.conversations[`dm.operator.${rawId}`]; + if (operatorDm) return operatorDm; + + const participating = Object.values(snapshot.conversations).filter( + (conversation) => conversation.participantIds?.includes(rawId), + ); + if (participating.length === 0) return null; + + const lastActivityMs = (conversationId: string): number => + Object.values(snapshot.messages).reduce((latest, message) => { + if (message.conversationId !== conversationId) return latest; + return Math.max(latest, normalizeTimestampMs(message.createdAt) ?? 0); + }, 0); + + return participating + .slice() + .sort((a, b) => lastActivityMs(b.id) - lastActivityMs(a.id))[0] ?? null; +} + export async function getScoutMobileSessionSnapshot( conversationId: string, options: { @@ -624,7 +659,7 @@ export async function getScoutMobileSessionSnapshot( void currentDirectory; const broker = await requireMobileRelayContext(); const { snapshot } = broker; - const conversation = snapshot.conversations[conversationId]; + const conversation = resolveMobileConversation(snapshot, conversationId); // The conversation may not exist yet — the iOS app navigates to // dm.operator.{agentId} before any messages are sent. Return an @@ -677,7 +712,7 @@ export async function getScoutMobileSessionSnapshot( : null; const endpoint = directAgentId ? endpointForAgent(snapshot, directAgentId) : null; const agent = directAgentId ? snapshot.agents[directAgentId] : null; - const messagePage = pageMessagesForConversation(snapshot, conversationId, options); + const messagePage = pageMessagesForConversation(snapshot, conversation.id, options); const messages = messagePage.messages; const activeFlight = latestActiveFlightForAgent(snapshot, directAgentId); const lastAgentMessageAt = messages @@ -1131,13 +1166,13 @@ export type ScoutMobileActivityFilters = { export async function getScoutMobileActivity( filters: ScoutMobileActivityFilters = {}, -): Promise { - return loadScoutActivityItems({ - agentId: filters.agentId, - actorId: filters.actorId, - conversationId: filters.conversationId, - limit: filters.limit ?? 100, - }); +): Promise { + // Home is an orientation surface, so it reads the broker's *curated* home + // activity — one row per message, named actors, always thread-linked — not the + // raw `/v1/activity` lifecycle firehose (ask_opened / flight_updated / …), + // which is an ops feed and stays on the Tail tab. See project_home_purpose. + const home = await readScoutBrokerHome(); + return (home?.activity ?? []).slice(0, filters.limit ?? 100); } // -- Comms (channels + DMs) ---------------------------------------------- diff --git a/apps/desktop/src/core/pairing/runtime/bridge/router.ts b/apps/desktop/src/core/pairing/runtime/bridge/router.ts index 282ee3cb..7b14a554 100644 --- a/apps/desktop/src/core/pairing/runtime/bridge/router.ts +++ b/apps/desktop/src/core/pairing/runtime/bridge/router.ts @@ -40,7 +40,6 @@ import { import { provisionMobileTerminalAccess } from "./mobile-terminal-provision.ts"; import { syncMobilePushRegistrationWithRelay } from "@openscout/runtime/mobile-push"; import { - conversationIdForAgent, queryMobileAgentDetail, queryMobileAgents, queryMobileSessions, @@ -877,13 +876,12 @@ const mobileRouter = t.router({ message: "conversationId is required", }); } - // Accept conversation IDs directly, or resolve agent IDs → - // dm.operator.{agentId} (the broker's deterministic convention). - const conversationId = rawId.startsWith("dm.") - ? rawId - : conversationIdForAgent(rawId); + // Pass the routed id straight through — the snapshot service resolves it + // against the live broker snapshot (a `c.…`/`dm.…` conversation id, or a + // bare agent id → its actual conversation). The old `dm.operator.{agentId}` + // wrap was wrong for agents whose conversation is keyed `c.…`. return getScoutMobileSessionSnapshot( - conversationId, + rawId, { beforeTurnId: input.beforeTurnId ?? null, limit: typeof input.limit === "number" ? input.limit : null, diff --git a/apps/ios/ScoutNext/AgentsSurface.swift b/apps/ios/ScoutNext/AgentsSurface.swift index eb0679e3..1d3ae2a0 100644 --- a/apps/ios/ScoutNext/AgentsSurface.swift +++ b/apps/ios/ScoutNext/AgentsSurface.swift @@ -113,13 +113,14 @@ struct AgentsSurface: View { ) } } else { - // Most-recent: a flat list, newest first, every row self-describing - // (project shown inline since there's no header to carry it). + // Most-recent: a flat list, newest first. The name + harness + age is + // the identity here; we don't repeat the project (that's PROJECT mode's + // job) — the second line only appears when the agent is on a branch. ForEach(Array(recents.enumerated()), id: \.element.id) { idx, agent in AgentRow( agent: agent, connector: nil, - showProject: true, + showProject: false, onTap: { tapAgent(agent) } ) if idx < recents.count - 1 { rowDivider } @@ -334,7 +335,8 @@ private struct AgentRow: View { let agent: AgentSummary /// Non-nil ⇒ a leaf under a multi-agent project (tree rail + indent). let connector: Connector? - /// Recent/flat mode prepends the project to the session line for context. + /// When set, prepends the project to the session line — only useful where no + /// header carries it. Recent mode leaves this off (name + age is enough). var showProject: Bool = false let onTap: () -> Void @@ -385,8 +387,8 @@ private struct AgentRow: View { /// The session coordinate beneath the name: the working branch when the agent /// is on one (recency is already shown as the age on the right — no point - /// repeating the idle "Available" status). Recent mode prefixes the project so - /// a flat row is self-describing. + /// repeating the idle "Available" status). With `showProject`, the project is + /// prefixed for rows that have no header to carry it. private var sessionLine: String? { let branch = agent.branch.flatMap { $0.isEmpty ? nil : $0 } let parts = [showProject ? displayProjectName(agent.projectName) : nil, branch].compactMap { $0 } diff --git a/apps/ios/ScoutNext/CommsSurface.swift b/apps/ios/ScoutNext/CommsSurface.swift index 7bed758e..8ae34488 100644 --- a/apps/ios/ScoutNext/CommsSurface.swift +++ b/apps/ios/ScoutNext/CommsSurface.swift @@ -58,10 +58,27 @@ struct CommsSurface: View { .refreshable { await load() } .task(id: reloadToken) { await load() } .navigationDestination(item: $route) { convo in - CommsThreadView(client: client, conversation: convo, onClose: { route = nil }) + CommsThreadView( + client: client, + conversation: convo, + onClose: { route = nil }, + onRead: { await markRead(convo.id) } + ) } } + /// Opening a thread clears its unread badge: drop the count locally so the row + /// is already caught up when the operator pops back, then tell the broker to + /// advance the operator's read cursor. Best-effort — a failed write just means + /// the badge returns on the next list pull. + private func markRead(_ conversationId: String) async { + if let idx = conversations.firstIndex(where: { $0.id == conversationId }), + conversations[idx].unreadCount != 0 { + conversations[idx].unreadCount = 0 + } + _ = try? await client.markConversationRead(conversationId: conversationId) + } + // MARK: - Filtering private var filtered: [CommsConversation] { @@ -106,13 +123,27 @@ private struct CommsRow: View { private var unread: Bool { conversation.unreadCount > 0 } + /// Only channels/threads/groups/system carry a leading type glyph. DMs (the + /// vast majority) used to reserve a blank slot here, which just shoved every + /// title ~one glyph off the content margin for a column that drew nothing — + /// so they now render with no leading element and the title sits flush left. + private var showsTypeGlyph: Bool { + switch conversation.kind { + case .direct, .unknown: return false + default: return true + } + } + var body: some View { Button(action: onTap) { VStack(spacing: 0) { HStack(spacing: HudSpacing.md) { - // Left: conversation TYPE — a hand-drawn glyph, the list's rhythm. - CommsTypeGlyph(kind: conversation.kind) - .foregroundStyle(HudPalette.muted) + // Left: conversation TYPE — a hand-drawn glyph, the list's + // rhythm — but only when it actually marks something. + if showsTypeGlyph { + CommsTypeGlyph(kind: conversation.kind) + .foregroundStyle(HudPalette.muted) + } Text(displayTitle) .font(HudFont.ui(HudTextSize.md, weight: unread ? .semibold : .medium)) @@ -476,6 +507,9 @@ struct CommsThreadView: View { let client: any ScoutBrokerClient let conversation: CommsConversation let onClose: () -> Void + /// Called once the thread is on screen so the list can clear the unread badge + /// and the broker can advance the operator's read cursor. Defaults to a no-op. + var onRead: () async -> Void = {} @State private var messages: [CommsMessage] = [] @State private var isLoading = true @@ -497,7 +531,7 @@ struct CommsThreadView: View { // optional, not the only way out of a thread. .background(InteractivePopGestureEnabler()) .safeAreaInset(edge: .bottom) { composer } - .task { await load() } + .task { await load(); await onRead() } .onAppear { voice.prepare() } .onDisappear { if voice.isListening { voice.cancel() } } } diff --git a/apps/ios/ScoutNext/ConversationSurface.swift b/apps/ios/ScoutNext/ConversationSurface.swift index addd79d9..fbc40e01 100644 --- a/apps/ios/ScoutNext/ConversationSurface.swift +++ b/apps/ios/ScoutNext/ConversationSurface.swift @@ -19,6 +19,7 @@ struct ConversationSurface: View { @State private var projection = ConversationProjection() @State private var isStreaming = false + @State private var loadPhase: LoadPhase = .loading @State private var composerText = "" @State private var isSending = false @State private var showSettings = false @@ -28,6 +29,11 @@ struct ConversationSurface: View { private var turns: [TurnState] { projection.state?.turns ?? [] } + /// Distinguishes the three reasons a transcript can be empty so the surface + /// never renders an unexplained void: still fetching, loaded-but-no-history, + /// or the snapshot RPC failed. + private enum LoadPhase { case loading, loaded, failed } + var body: some View { VStack(spacing: 0) { header @@ -241,7 +247,50 @@ struct ConversationSurface: View { // MARK: - Transcript + @ViewBuilder private var transcript: some View { + if turns.isEmpty { + emptyState + } else { + transcriptScroll + } + } + + /// Shown when there's nothing to render — explains *why* rather than leaving a + /// black void: a card-created or never-run agent legitimately has no history, + /// which reads as "no messages yet" + the composer below; a failed fetch reads + /// as an error you can retry. + @ViewBuilder + private var emptyState: some View { + VStack { + Spacer(minLength: 0) + switch loadPhase { + case .loading: + HudEmptyState(title: "Loading conversation", icon: "ellipsis.bubble") + case .failed: + VStack(spacing: HudSpacing.lg) { + HudEmptyState( + title: "Couldn’t load conversation", + subtitle: "The bridge didn’t return a transcript for this session.", + icon: "exclamationmark.bubble" + ) + HudButton("Retry", icon: "arrow.clockwise", style: .secondary) { + Task { await run() } + } + } + case .loaded: + HudEmptyState( + title: "No messages yet", + subtitle: "Steer the agent below to begin.", + icon: "bubble.left.and.bubble.right" + ) + } + Spacer(minLength: 0) + } + .frame(maxWidth: .infinity, maxHeight: .infinity) + } + + private var transcriptScroll: some View { GeometryReader { geo in ScrollViewReader { proxy in ScrollView { @@ -277,12 +326,19 @@ struct ConversationSurface: View { // MARK: - Lifecycle private func run() async { + loadPhase = .loading // Recover authoritative state, then fold live events on top — exactly // the snapshot-then-stream contract the projection is built around. - if let snapshot = try? await client.snapshot(conversationId: conversationId) { + do { + let snapshot = try await client.snapshot(conversationId: conversationId) var p = ConversationProjection() p.applySnapshot(snapshot) projection = p + loadPhase = .loaded + } catch { + // No authoritative snapshot. Surface the failure, but still attach to + // the live stream so a session that's actively producing can populate. + loadPhase = .failed } // Live events flip the badge on only when they actually arrive — a // static (already-settled) conversation stays "idle". diff --git a/apps/ios/ScoutNext/HomeSurface.swift b/apps/ios/ScoutNext/HomeSurface.swift index 139c18fa..9319838d 100644 --- a/apps/ios/ScoutNext/HomeSurface.swift +++ b/apps/ios/ScoutNext/HomeSurface.swift @@ -54,12 +54,11 @@ struct HomeSurface: View { if !recentActivity.isEmpty { activitySection } } } - .padding(.horizontal, HudSpacing.xxxl) + .padding(.horizontal, HudSpacing.xxl) .padding(.vertical, HudSpacing.xxl) } .refreshable { await load() } .task(id: reloadToken) { await load() } - .task(id: reloadToken) { await streamActivity() } .navigationDestination(item: $route) { route in ConversationSurface( client: client, @@ -281,19 +280,34 @@ struct HomeSurface: View { listCard { ForEach(Array(recentActivity.enumerated()), id: \.element.id) { index, event in if index > 0 { rowSeparator() } - ActivityRow(event: event) + ActivityRow(event: event, onTap: tapActivity(event)) } } } } - /// Subscribe to the activity firehose and keep the newest events. The Tail tab - /// owns the full live view; here we just surface the most recent few. - private func streamActivity() async { - for await event in client.tailEvents(since: nil) { - activity.insert(event, at: 0) - if activity.count > 24 { activity.removeLast(activity.count - 24) } + /// Tap an activity row to open the conversation it happened in. Events with + /// no thread linkage (`conversationId == nil`) stay non-interactive. + private func tapActivity(_ event: TailEvent) -> (() -> Void)? { + guard let conversationId = event.conversationId, !conversationId.isEmpty else { return nil } + return { route = ConversationRoute(id: conversationId, title: event.source) } + } + + /// Merge the curated activity feed into `activity`, deduped by id and + /// newest-first. Home is an orientation surface, so it reads the broker's + /// curated message feed and refreshes on appear / pull-to-refresh — it does + /// NOT fold in the live process firehose (that's the Tail tab's job). + private func mergeActivity(_ incoming: [TailEvent]) { + guard !incoming.isEmpty else { return } + var seen = Set(activity.map(\.id)) + var merged = activity + for event in incoming where !seen.contains(event.id) { + merged.append(event) + seen.insert(event.id) } + merged.sort { $0.tsMs > $1.tsMs } + if merged.count > 24 { merged.removeLast(merged.count - 24) } + activity = merged } // MARK: - Load @@ -301,6 +315,10 @@ struct HomeSurface: View { private func load() async { isLoading = true agents = (try? await client.listAgents(query: nil, limit: 20)) ?? [] + // Backfill recent activity — the live tail stream only delivers events + // that arrive after we subscribe, so without this the section is empty + // until something new happens. + mergeActivity((try? await client.recentActivity(limit: 24)) ?? []) isLoading = false #if DEBUG if ProcessInfo.processInfo.environment["SCOUTNEXT_DEMO"] == "1" { seedDemoActivity() } @@ -435,8 +453,16 @@ private struct BlinkingCursor: View { /// One line of the activity log — what an agent did, who, and when. private struct ActivityRow: View { let event: TailEvent + /// Set when the event links to a conversation; nil rows render inert. + var onTap: (() -> Void)? = nil var body: some View { + Button { onTap?() } label: { rowContent } + .buttonStyle(.plain) + .disabled(onTap == nil) + } + + private var rowContent: some View { HStack(alignment: .top, spacing: HudSpacing.md) { HudStatusDot(color: kindColor, size: 6, pulses: false) .padding(.top, 5) @@ -450,10 +476,17 @@ private struct ActivityRow: View { .font(HudFont.mono(HudTextSize.micro)) .foregroundStyle(HudPalette.muted) } + if onTap != nil { + Image(systemName: "chevron.right") + .font(HudFont.ui(HudTextSize.xs, weight: .semibold)) + .foregroundStyle(HudPalette.dim) + .padding(.top, 4) + } } .padding(.horizontal, HudSpacing.xl) .padding(.vertical, HudSpacing.md) .frame(maxWidth: .infinity, alignment: .leading) + .contentShape(Rectangle()) } private var metaLine: String { @@ -585,12 +618,17 @@ private struct AgentFleetRow: View { .font(HudFont.ui(HudTextSize.md, weight: .semibold)) .foregroundStyle(HudPalette.ink) .lineLimit(1) + .layoutPriority(1) if let locator = locator { Text(locator) .font(HudFont.mono(HudTextSize.xs)) - .foregroundStyle(HudPalette.ink) + // Subordinate to the name: the mono locator was reading at full + // ink and fighting the sans title. Muted lets the name lead and + // the runtime/project sit as a quiet tag beside it. + .foregroundStyle(HudPalette.muted) .lineLimit(1) .truncationMode(.tail) + .layoutPriority(0) } Spacer(minLength: HudSpacing.md) if showsStateBadge { diff --git a/apps/ios/ScoutNext/NewSessionSurface.swift b/apps/ios/ScoutNext/NewSessionSurface.swift index 294817d7..7df977ab 100644 --- a/apps/ios/ScoutNext/NewSessionSurface.swift +++ b/apps/ios/ScoutNext/NewSessionSurface.swift @@ -5,13 +5,25 @@ import ScoutCapabilities /// New Session — a composer that builds a project-modality /// `SessionInitiationSpec` (target.projectPath set, execution.session = .new, /// seed.instructions) and dispatches it through the broker client, then shows -/// the returned ids. Token usage cribbed from -/// `apps/macos/Sources/Scout/ScoutSessionService.swift`. +/// the returned ids. The reading order is the operator's: pick the **project**, +/// write the **instructions**, then confirm/adjust the **agent** (harness, +/// model, target) — which leads with a sensible default and stays calm and +/// value-like until you engage it. struct NewSessionSurface: View { let client: any ScoutBrokerClient + /// Friendly name of the Mac the live bridge is connected to, shown as the + /// read-only "Target" — the session lands on the machine we're paired with. + /// nil when unconnected. A live target *picker* (choosing among paired + /// machines) waits on multi-machine routing; today the bridge is one link. + var targetMachineName: String? = nil @State private var projectPath: String = "/Users/arach/dev/openscout" @State private var instructions: String = "Stand up the ScoutNext shell and get it running in the simulator." + /// Selected harness id (the spec's `execution.harness`) and curated model id. + /// Model is scoped to the harness, so changing harness resets it to Default. + @State private var harnessId: String = HarnessOption.catalog[0].id + @State private var modelId: String = ModelOption.defaultId + @State private var showProjectPicker = false @State private var isSubmitting = false @State private var result: SessionInitiationResult? @State private var errorText: String? @@ -24,11 +36,56 @@ struct NewSessionSurface: View { let title: String } + /// A curated harness + its hand-picked model menu. Until the bridge exposes + /// the live `harness-catalog` (with per-machine readiness), this is the two + /// featured workspace harnesses with a short, valid model list each — the + /// strings are passed verbatim to the CLI as `--model`. + private struct HarnessOption: Identifiable, Hashable { + let id: String // spec `execution.harness`, e.g. "claude" + let label: String // menu label, e.g. "Claude Code" + let models: [ModelOption] + + static let catalog: [HarnessOption] = [ + HarnessOption(id: "claude", label: "Claude Code", models: [ + .defaultOption, + ModelOption(id: "opus", label: "Opus", value: "opus"), + ModelOption(id: "sonnet", label: "Sonnet", value: "sonnet"), + ModelOption(id: "haiku", label: "Haiku", value: "haiku"), + ]), + HarnessOption(id: "codex", label: "Codex", models: [ + .defaultOption, + ModelOption(id: "gpt-5-codex", label: "GPT-5 Codex", value: "gpt-5-codex"), + ModelOption(id: "gpt-5", label: "GPT-5", value: "gpt-5"), + ModelOption(id: "gpt-5-pro", label: "GPT-5 Pro", value: "gpt-5-pro"), + ]), + ] + } + + /// One model menu entry. `value` is the `--model` string, or nil for + /// "Default" — which omits the field so the harness picks its own default. + private struct ModelOption: Identifiable, Hashable { + let id: String + let label: String + let value: String? + + static let defaultId = "default" + static let defaultOption = ModelOption(id: defaultId, label: "Default", value: nil) + } + + private var selectedHarness: HarnessOption { + HarnessOption.catalog.first { $0.id == harnessId } ?? HarnessOption.catalog[0] + } + + private var selectedModel: ModelOption { + selectedHarness.models.first { $0.id == modelId } ?? ModelOption.defaultOption + } + var body: some View { ScrollView { VStack(alignment: .leading, spacing: HudSpacing.xxl) { projectSection instructionsSection + agentSection if let errorText { Text(errorText) .font(HudFont.mono(HudTextSize.xs)) @@ -50,15 +107,70 @@ struct NewSessionSurface: View { onClose: { self.route = nil } ) } + .sheet(isPresented: $showProjectPicker) { + ProjectPickerSheet(client: client, projectPath: $projectPath) + } } + // MARK: - Project + + /// The project row reads as a value — name on top, parent dimmed beneath — + /// and the whole row taps through to the known-projects tree. private var projectSection: some View { VStack(alignment: .leading, spacing: HudSpacing.lg) { HudSectionLabel("Project") - HudField("Project path", text: $projectPath, icon: "folder") + Button { + showProjectPicker = true + } label: { + HStack(spacing: HudSpacing.md) { + Image(systemName: "folder") + .font(HudFont.ui(HudTextSize.md)) + .foregroundStyle(HudPalette.muted) + VStack(alignment: .leading, spacing: 2) { + Text(projectLeaf.isEmpty ? "Choose a project" : projectLeaf) + .font(HudFont.ui(HudTextSize.base, weight: .medium)) + .foregroundStyle(projectLeaf.isEmpty ? HudPalette.dim : HudPalette.ink) + .lineLimit(1) + if !projectParent.isEmpty { + Text(projectParent) + .font(HudFont.mono(HudTextSize.xxs)) + .foregroundStyle(HudPalette.dim) + .lineLimit(1) + .truncationMode(.head) + } + } + Spacer(minLength: HudSpacing.md) + Image(systemName: "chevron.right") + .font(HudFont.ui(HudTextSize.sm, weight: .semibold)) + .foregroundStyle(HudPalette.muted) + } + .padding(HudSpacing.lg) + .frame(maxWidth: .infinity, alignment: .leading) + .background(RoundedRectangle(cornerRadius: HudRadius.standard, style: .continuous).fill(HudSurface.inset)) + .overlay( + RoundedRectangle(cornerRadius: HudRadius.standard, style: .continuous) + .stroke(HudHairline.standard, lineWidth: HudStrokeWidth.standard) + ) + } + .buttonStyle(.plain) } } + private var projectLeaf: String { + (trimmedProjectPath as NSString).lastPathComponent + } + + private var projectParent: String { + let parent = (trimmedProjectPath as NSString).deletingLastPathComponent + return parent == "/" || parent == trimmedProjectPath ? "" : parent + } + + private var trimmedProjectPath: String { + projectPath.trimmingCharacters(in: .whitespacesAndNewlines) + } + + // MARK: - Instructions + private var instructionsSection: some View { VStack(alignment: .leading, spacing: HudSpacing.lg) { HudSectionLabel("Instructions") @@ -77,6 +189,87 @@ struct NewSessionSurface: View { } } + // MARK: - Agent + + /// Harness · model · target on one calm line. They lead with the default and + /// present as values (ink text + a small caret), not loud controls; the + /// harness and model carets open inline menus. Target is read-only until + /// multi-machine routing exists. + private var agentSection: some View { + VStack(alignment: .leading, spacing: HudSpacing.lg) { + HudSectionLabel("Agent") + HStack(spacing: HudSpacing.md) { + choiceMenu(value: selectedHarness.label) { + Picker("Harness", selection: $harnessId) { + ForEach(HarnessOption.catalog) { harness in + Text(harness.label).tag(harness.id) + } + } + .pickerStyle(.inline) + } + tokenSeparator + choiceMenu(value: selectedModel.label) { + Picker("Model", selection: $modelId) { + ForEach(selectedHarness.models) { model in + Text(model.label).tag(model.id) + } + } + .pickerStyle(.inline) + } + Spacer(minLength: HudSpacing.md) + targetToken + } + .padding(HudSpacing.lg) + .frame(maxWidth: .infinity, alignment: .leading) + .background(RoundedRectangle(cornerRadius: HudRadius.standard, style: .continuous).fill(HudSurface.inset)) + .overlay( + RoundedRectangle(cornerRadius: HudRadius.standard, style: .continuous) + .stroke(HudHairline.standard, lineWidth: HudStrokeWidth.standard) + ) + // The menus inherit the system blue tint by default; pull them onto + // the cockpit accent so the open menu reads with the rest of the app. + .tint(HudPalette.accent) + } + // A model belongs to its harness — switching harness drops back to the + // harness's Default rather than carrying a now-invalid model id. + .onChange(of: harnessId) { _, _ in modelId = ModelOption.defaultId } + } + + /// A value token that opens a menu on tap: ink text + a small muted caret. + private func choiceMenu(value: String, @ViewBuilder menu: () -> Menu) -> some View { + SwiftUI.Menu { + menu() + } label: { + HStack(spacing: HudSpacing.xxs) { + Text(value) + .font(HudFont.ui(HudTextSize.sm, weight: .medium)) + .foregroundStyle(HudPalette.ink) + Image(systemName: "chevron.down") + .font(HudFont.ui(HudTextSize.micro, weight: .semibold)) + .foregroundStyle(HudPalette.muted) + } + } + } + + private var tokenSeparator: some View { + Text("·") + .font(HudFont.ui(HudTextSize.sm)) + .foregroundStyle(HudPalette.dim) + } + + private var targetToken: some View { + HStack(spacing: HudSpacing.xs) { + HudStatusDot(color: targetMachineName == nil ? HudPalette.muted : HudPalette.accent, size: 6) + Text(targetMachineName ?? "Not connected") + .font(HudFont.ui(HudTextSize.xs, weight: .medium)) + .foregroundStyle(targetMachineName == nil ? HudPalette.dim : HudPalette.muted) + .lineLimit(1) + .truncationMode(.tail) + } + } + + // MARK: - Result + private func resultCard(_ result: SessionInitiationResult) -> some View { HudCard { VStack(alignment: .leading, spacing: HudSpacing.md) { @@ -109,6 +302,8 @@ struct NewSessionSurface: View { } } + // MARK: - Footer + private var footer: some View { HStack { if isSubmitting { @@ -123,14 +318,14 @@ struct NewSessionSurface: View { } private var canSubmit: Bool { - !projectPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + !trimmedProjectPath.isEmpty } private func makeSpec() -> SessionInitiationSpec { let trimmedInstructions = instructions.trimmingCharacters(in: .whitespacesAndNewlines) return SessionInitiationSpec( - target: .init(projectPath: projectPath.trimmingCharacters(in: .whitespacesAndNewlines)), - execution: .init(session: .new), + target: .init(projectPath: trimmedProjectPath), + execution: .init(harness: harnessId, model: selectedModel.value, session: .new), agent: .init(persistence: "sticky"), seed: .init(instructions: trimmedInstructions.isEmpty ? nil : trimmedInstructions) ) @@ -162,8 +357,185 @@ struct NewSessionSurface: View { /// Title for the pushed conversation: the project's last path component, /// falling back to a generic label. private var sessionTitle: String { - let trimmed = projectPath.trimmingCharacters(in: .whitespacesAndNewlines) - let last = (trimmed as NSString).lastPathComponent + let last = projectLeaf return last.isEmpty ? "New session" : last } } + +/// Known-projects picker: a tree of the project roots Scout has seen (grouped by +/// parent directory), plus a manual path field for anything not yet known. The +/// known list comes from the broker's sessions — empty until connected, which +/// is why the manual field always stays available. +private struct ProjectPickerSheet: View { + let client: any ScoutBrokerClient + @Binding var projectPath: String + @Environment(\.dismiss) private var dismiss + + @State private var groups: [ProjectGroup] = [] + @State private var isLoading = true + @State private var loadError: String? + @State private var manualPath: String = "" + + private struct ProjectGroup: Identifiable { + let id: String // parent directory + let parent: String + let projects: [Project] + } + + private struct Project: Identifiable { + let id: String // full path + let name: String + let path: String + } + + var body: some View { + NavigationStack { + ScrollView { + VStack(alignment: .leading, spacing: HudSpacing.xxl) { + manualSection + knownSection + } + .padding(HudSpacing.xxl) + } + .background(HudPalette.bg) + .navigationTitle("Choose project") + .navigationBarTitleDisplayMode(.inline) + .toolbar { + ToolbarItem(placement: .topBarTrailing) { + Button("Done") { dismiss() } + .foregroundStyle(HudPalette.accent) + } + } + } + .preferredColorScheme(.dark) + .tint(HudPalette.accent) + .task { await load() } + } + + // MARK: Manual entry + + private var manualSection: some View { + VStack(alignment: .leading, spacing: HudSpacing.lg) { + HudSectionLabel("Path") + HudField("Project path", text: $manualPath, icon: "folder") + HStack { + Spacer() + HudButton("Use this path", icon: "arrow.right", style: .secondary) { + commit(manualPath) + } + .disabled(manualPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + } + } + } + + // MARK: Known projects + + private var knownSection: some View { + VStack(alignment: .leading, spacing: HudSpacing.lg) { + HudSectionLabel("Known projects") + if isLoading { + HStack(spacing: HudSpacing.md) { + ProgressView().controlSize(.small) + Text("Loading…") + .font(HudFont.ui(HudTextSize.sm)) + .foregroundStyle(HudPalette.muted) + } + .padding(.vertical, HudSpacing.lg) + } else if let loadError { + Text(loadError) + .font(HudFont.mono(HudTextSize.xs)) + .foregroundStyle(HudPalette.statusError) + .fixedSize(horizontal: false, vertical: true) + } else if groups.isEmpty { + HudEmptyState(title: "No known projects yet", icon: "folder") + } else { + VStack(alignment: .leading, spacing: HudSpacing.sm) { + ForEach(groups) { group in + DisclosureGroup { + VStack(alignment: .leading, spacing: 0) { + ForEach(group.projects) { project in + projectRow(project) + } + } + .padding(.top, HudSpacing.xs) + } label: { + Text(group.parent) + .font(HudFont.mono(HudTextSize.xxs, weight: .semibold)) + .foregroundStyle(HudPalette.muted) + .lineLimit(1) + .truncationMode(.head) + } + } + } + } + } + } + + private func projectRow(_ project: Project) -> some View { + Button { + commit(project.path) + } label: { + HStack(spacing: HudSpacing.md) { + Image(systemName: "folder.fill") + .font(HudFont.ui(HudTextSize.sm)) + .foregroundStyle(HudPalette.dim) + Text(project.name) + .font(HudFont.ui(HudTextSize.sm, weight: .medium)) + .foregroundStyle(HudPalette.ink) + .lineLimit(1) + Spacer(minLength: HudSpacing.md) + if project.path == projectPath.trimmingCharacters(in: .whitespacesAndNewlines) { + Image(systemName: "checkmark") + .font(HudFont.ui(HudTextSize.xs, weight: .bold)) + .foregroundStyle(HudPalette.accent) + } + } + .padding(.vertical, HudSpacing.sm) + .padding(.leading, HudSpacing.lg) + .frame(maxWidth: .infinity, alignment: .leading) + .contentShape(Rectangle()) + } + .buttonStyle(.plain) + } + + // MARK: Data + + private func commit(_ path: String) { + let trimmed = path.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return } + projectPath = trimmed + dismiss() + } + + private func load() async { + if manualPath.isEmpty { manualPath = projectPath } + do { + let sessions = try await client.listSessions(query: nil, limit: 200) + let roots = sessions.compactMap { session -> String? in + guard let root = session.workspaceRoot?.trimmingCharacters(in: .whitespacesAndNewlines), + !root.isEmpty else { return nil } + return root + } + groups = Self.group(Array(Set(roots))) + isLoading = false + } catch { + loadError = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription + isLoading = false + } + } + + private static func group(_ roots: [String]) -> [ProjectGroup] { + let byParent = Dictionary(grouping: roots) { ($0 as NSString).deletingLastPathComponent } + return byParent + .map { parent, paths in + ProjectGroup( + id: parent, + parent: parent, + projects: paths + .sorted { ($0 as NSString).lastPathComponent.localizedCaseInsensitiveCompare(($1 as NSString).lastPathComponent) == .orderedAscending } + .map { Project(id: $0, name: ($0 as NSString).lastPathComponent, path: $0) } + ) + } + .sorted { $0.parent.localizedCaseInsensitiveCompare($1.parent) == .orderedAscending } + } +} diff --git a/apps/ios/ScoutNext/RootView.swift b/apps/ios/ScoutNext/RootView.swift index 9f5a7d28..eed03b09 100644 --- a/apps/ios/ScoutNext/RootView.swift +++ b/apps/ios/ScoutNext/RootView.swift @@ -14,6 +14,12 @@ struct RootView: View { private var client: any ScoutBrokerClient { model.client } + /// Friendly name of the Mac we're connected to, for the New composer's + /// read-only target. nil when unconnected. + private var activeMachineName: String? { + model.pairedMachines.first(where: { $0.isActive })?.name + } + enum Surface: String, CaseIterable, Identifiable { case home = "Home" case agents = "Agents" @@ -58,7 +64,7 @@ struct RootView: View { case .agents: AgentsSurface(client: client, reloadToken: model.dataReadyToken) case .comms: CommsSurface(client: client, reloadToken: model.dataReadyToken) case .terminal: TerminalSurface(client: client, reloadToken: model.dataReadyToken, connectedHost: model.terminalSSHHost) - case .new: NewSessionSurface(client: client) + case .new: NewSessionSurface(client: client, targetMachineName: activeMachineName) } } .frame(maxWidth: .infinity, maxHeight: .infinity) @@ -140,7 +146,7 @@ struct RootView: View { } .buttonStyle(.plain) } - .padding(.horizontal, HudSpacing.xxxl) + .padding(.horizontal, HudSpacing.xxl) .padding(.top, HudSpacing.lg) .padding(.bottom, HudSpacing.xl) } diff --git a/design/studio/app/studies/search-results/page.tsx b/design/studio/app/studies/search-results/page.tsx new file mode 100644 index 00000000..fdcfe8a1 --- /dev/null +++ b/design/studio/app/studies/search-results/page.tsx @@ -0,0 +1,705 @@ +/** + * Search Results — design study. + * + * The brief (operator follow-up): the shipped Knowledge Search surface + * reads like an index dump — chunk titles ("Events window 3"), raw + * snippets with `[0234] command_or_tool` markers, "Index rank 0.000", + * and a wall of raw JSONL. People bounce off it. This study is the + * design answer, rendered in the studio instead of a standalone HTML + * page so it lives next to the other web studies and inherits the real + * token theme. + * + * The idiom this study commits to: **a result is a moment in a + * conversation, not a chunk in an index.** Every surface leads with + * human content; the index machinery (score, chunk, QMD, raw JSONL) is + * always exactly one disclosure away, never the headline. + * + * Four moves carry the whole redesign: + * 1. One card per *session*, not per chunk (collapse the repetition). + * 2. A conversation-first inspector — rendered turns, tools folded. + * 3. Rendered by default; raw JSONL is one deliberate click deep. + * 4. Always a next move — "Open conversation" + "Fork from here". + * + * Everything here is static. No props change, no motion. Each cell is a + * frozen frame so the change reads at a glance against the "before". + * + * Companion spec: docs/eng/reviews/knowledge-search-redesign-dewey.md + * Maps to: packages/web/client/screens/KnowledgeSearchScreen.tsx, + * packages/web/client/screens/KnowledgeSearchInspector.tsx + */ + +import type { CSSProperties, ReactNode } from "react"; + +// ── shared style fragments ────────────────────────────────────────── + +const PANEL: CSSProperties = { + background: + "linear-gradient(180deg, color-mix(in oklab, var(--studio-surface) 70%, transparent), transparent 180px), var(--studio-surface)", +}; +const SELECTED_CARD: CSSProperties = { + borderLeft: "2px solid var(--scout-accent)", + background: "color-mix(in oklab, var(--scout-accent) 7%, var(--studio-surface))", +}; +const MARK: CSSProperties = { + background: "color-mix(in oklab, var(--scout-accent) 26%, transparent)", + borderRadius: 3, + padding: "0 2px", + color: "var(--studio-ink)", +}; +const PRIMARY_BTN: CSSProperties = { + background: "var(--scout-accent)", + color: "oklch(0.2 0.05 145)", + borderColor: "var(--scout-accent)", +}; + +// ── atoms ─────────────────────────────────────────────────────────── + +function Mark({ children }: { children: ReactNode }) { + return {children}; +} + +function SectionTitle({ + children, + hint, + className = "", +}: { + children: ReactNode; + hint?: string; + className?: string; +}) { + return ( +
+
+

+ {children} +

+ {hint && ( + + {hint} + + )} +
+
+ ); +} + +function Pips({ level, tone = "accent" }: { level: 1 | 2 | 3 | 4; tone?: "accent" | "warm" }) { + const onColor = tone === "warm" ? "var(--status-warn-fg)" : "var(--scout-accent)"; + return ( + + {[1, 2, 3, 4].map((i) => ( + + {i <= level ? "●" : "○"} + + ))} + + ); +} + +function Chip({ children, primary = false }: { children: ReactNode; primary?: boolean }) { + return ( + + {children} + + ); +} + +function CalloutDot({ letter }: { letter: string }) { + return ( + + {letter} + + ); +} + +// ── result cards ──────────────────────────────────────────────────── + +/** The "before" — what ships today: chunk title, raw event-window + * snippet with record markers, index-rank reason. Flat, machine-ish. */ +function BeforeCard() { + return ( +
+
+ 📄 + Events window 3 +
+

+ - [0313] `command_or_tool` (tool_use) - we should embed selected QMD chunks, not raw + source files {"{"}"provider":"…"{"}"} +

+
Matched 2 query terms in indexed QMD
+
+ openscout + claude + records 312..340 +
+ + ~/.../3f9c…session.jsonl + +
+ ); +} + +interface MomentRow { + idx: string; + kind: string; + quote: string; + level: 1 | 2 | 3 | 4; +} + +/** The "after" — a session card. Headline is the session, the match is + * a clean rendered quote, the reason names term + role, machinery is a + * qualitative strength. Multi-moment cards collapse the repetition. */ +function SessionCard({ + harness, + glyph, + collection, + fresh, + title, + quote, + reasonTerm, + reasonRole, + strength, + level, + moments, + selected = false, + expanded = false, + showActions = false, +}: { + harness: string; + glyph: string; + collection: string; + fresh: string; + title: string; + quote: ReactNode; + reasonTerm: string; + reasonRole: string; + strength: string; + level: 1 | 2 | 3 | 4; + moments?: MomentRow[]; + selected?: boolean; + expanded?: boolean; + showActions?: boolean; +}) { + const tone = level >= 3 ? "accent" : "warm"; + return ( +
+
+ {glyph} + {harness} + · + {collection} + {fresh} +
+

{title}

+

{quote}

+
+ + Matched “{reasonTerm}” in {reasonRole} + + + {strength} + +
+ + {moments && moments.length > 0 && ( + <> +
+ {expanded ? "▾" : "▸"} {moments.length} matches in this session +
+ {expanded && ( +
+ {moments.map((m) => ( +
+ {m.idx} + {m.kind} + {m.quote} + = 3 ? "accent" : "warm"} /> +
+ ))} +
+ )} + + )} + + {showActions && ( +
+ ▸ Open conversation + ⤴ Fork + ⧉ Copy ref + +
+ )} +
+ ); +} + +// ── inspector ─────────────────────────────────────────────────────── + +function Turn({ + role, + id, + matched = false, + children, +}: { + role: "user" | "assistant"; + id: string; + matched?: boolean; + children: ReactNode; +}) { + const edge = matched + ? "var(--scout-accent)" + : role === "user" + ? "oklch(0.6 0.12 270)" + : "color-mix(in oklab, var(--scout-accent) 45%, var(--studio-edge))"; + return ( +
+
+ {role} + {id} + {matched && ( + + ● matched + + )} +
+
{children}
+
+ ); +} + +function ToolFold() { + return ( +
+ ▸ 3 tool steps  (Read, Bash, Edit) +
+ ); +} + +function Disclosure({ + summary, + meta, + open = false, + children, +}: { + summary: ReactNode; + meta?: ReactNode; + open?: boolean; + children?: ReactNode; +}) { + return ( +
+
+ {open ? "▾" : "▸"} + {summary} + {meta && {meta}} +
+ {open &&
{children}
} +
+ ); +} + +const RAW_USER = `{ "type":"message","role":"user", + "content":[{"type":"text", + "text":"how should we store embeddings for the chunks?"}] }`; +const RAW_ASSISTANT = `{ "type":"message","role":"assistant", + "content":[{"type":"text","text":"We should embed selected + QMD chunks, not raw source files, and record provider, + model, and dimensions…"}] }`; + +function Inspector() { + return ( +
+ {/* head */} +
+
+ ◆ claude + · + openscout + · + 2d ago + + ✕ + +
+

+ Designing the QMD knowledge search index +

+
+ + ▸ Open conversation + + + ⤴ Fork from here + + + ⧉ Copy ref + + + ⤓ Raw + +
+
+ + {/* conversation excerpt */} +
+ Conversation + + records 308–340 + +
+
+ + how should we store embeddings for the chunks? + + + We should embed selected QMD chunks, not raw source files, and record + provider, model, and dimensions so the index stays rebuildable when the chunk policy + changes. + + + + …that keeps the index rebuildable without re-embedding everything when only the + lexical layer changes. + +
+ + {/* collapsed disclosures */} +
+ + Raw evidence (advanced)} meta="records 308–344" /> +
+ + {/* provenance footer */} +
+
+ derived + observed source + indexed 2d ago · mechanical extraction +
+
+ ~/.claude/projects/openscout/3f9c…session.jsonl + ⧉ copy +
+
+
+ ); +} + +// ── page ──────────────────────────────────────────────────────────── + +export default function SearchResultsStudy() { + return ( +
+
+
+ · studies · web · search results +
+

+ Search results +

+

+ The shipped Search surface reads like an index dump — chunk titles, raw event-window + snippets, “index rank 0.000”, a wall of JSONL. This study commits to one idea: a + result is a moment in a conversation, not a chunk + in an index. Human content leads; the machinery (score, chunk, QMD, raw JSONL) is + always one disclosure away. Four moves carry it — one card per{" "} + session, a conversation-first inspector,{" "} + rendered by default, and always a next move. +

+
+ + {/* 1 — before / after */} + Before → after +
+
+
+ today — the index shows through +
+ +

+ The chunk title is the document H1. The snippet is raw event-window text with{" "} + [0313] command_or_tool markers. The + reason speaks index-ese. Four near-identical chunk hits from one session stack as + four of these. +

+
+
+
+ redesign — the conversation shows through +
+ + “…we should embed selected QMD chunks, not raw source files, and + record the provider, model, and dimensions so the index stays rebuildable…” + + } + reasonTerm="embeddings" + reasonRole="an assistant reply" + strength="Strong" + level={3} + moments={[ + { idx: "0313", kind: "assistant reply", quote: "…embed selected QMD chunks, not raw…", level: 3 }, + { idx: "0420", kind: "tool output", quote: "embeddings provider model dimensions", level: 2 }, + { idx: "0511", kind: "your message", quote: "should embeddings be opt-in?", level: 1 }, + ]} + selected + expanded + showActions + /> +

+ One card per session. Headline is the session, not the chunk. The match is a clean + rendered quote; the reason names the term and the role; rank becomes a qualitative + strength. The four chunk hits collapse into three labelled moments. +

+
+
+ + {/* 2 — card anatomy */} + The result card +
+ + “…we should embed selected QMD chunks, not raw source files, and record + the provider, model, and dimensions…” + + } + reasonTerm="embeddings" + reasonRole="an assistant reply" + strength="Strong" + level={3} + moments={[ + { idx: "0313", kind: "assistant reply", quote: "…embed selected QMD chunks, not raw…", level: 3 }, + { idx: "0420", kind: "tool output", quote: "embeddings provider model dimensions", level: 2 }, + ]} + showActions + /> +
    + {[ + ["A", "Identity row", "harness glyph · project · collection, freshness right-aligned. Muted — context, not headline."], + ["B", "Headline = session title", "New sessionTitle field (AI title → first user prompt → project·topic). Never the chunk H1."], + ["C", "Rendered match quote", "One clean sentence of renderedText, terms highlighted. No [NNNN] markers — rendered server-side."], + ["D", "Reason + strength", "“Matched ‘x’ in an assistant reply” + qualitative Strong/Good/Weak pips. Never the raw bm25 float."], + ["E", "Moments toggle", "Multiple chunk hits from one session collapse here; expand to labelled moment rows."], + ["F", "Hover actions", "Open conversation (primary) · Fork · Copy ref · ⋯. The card always offers a next move."], + ].map(([letter, title, body]) => ( +
  1. + + + {title} — {body} + +
  2. + ))} +
+
+ + {/* 3 — card states */} + Card states +
+
+
collapsed · multi-moment
+ “brute-force cosine over embeddings in knowledge.sqlite is fine under ~100k chunks…”} + reasonTerm="embeddings" reasonRole="2 assistant replies" strength="Good" level={2} + moments={[ + { idx: "0088", kind: "assistant reply", quote: "brute-force cosine…", level: 2 }, + { idx: "0142", kind: "assistant reply", quote: "sqlite-vec vs in-memory…", level: 2 }, + ]} + /> +
+
+
selected · expanded · hover
+ “…we should embed selected QMD chunks, not raw source files…”} + reasonTerm="embeddings" reasonRole="an assistant reply" strength="Strong" level={3} + moments={[ + { idx: "0313", kind: "assistant reply", quote: "…embed selected QMD chunks…", level: 3 }, + { idx: "0420", kind: "tool output", quote: "embeddings provider model…", level: 2 }, + ]} + selected expanded showActions + /> +
+
+
single moment · no toggle
+ “keep embeddings disabled by default until the user enables semantic search…”} + reasonTerm="embeddings" reasonRole="your message" strength="Weak" level={1} + /> +
+
+ + {/* 4 — the inspector */} + The selected-result inspector +

+ Reads top-to-bottom as a conversation with zero clicks. Header carries the session title + and the next-action bar; the body is the rendered slice around the match with tool steps + folded; “Why this matched” and “Raw evidence” sit collapsed beneath; a quiet provenance + footer holds the trust signals (origin, ownership, path). +

+
+ +
+ + {/* 5 — rendered vs raw */} + Rendered vs. raw JSONL +
+
+
default — rendered
+
+ how should we store embeddings for the chunks? + + We should embed selected QMD chunks, not raw source files, and record + provider, model, and dimensions. + + +
+

+ Role-labelled turns from renderedText; + matched turn highlighted and auto-scrolled; tool/process records folded, never + inline-equal. +

+
+
+
one click deep — raw evidence (advanced)
+ Raw evidence (advanced)} + meta="records 308–344" + open + > +
+
+ 0312user + how should we store embeddings… +
+
+
+ 0313 + assistant + We should embed selected QMD chunks… +
+
{RAW_ASSISTANT}
+
+
+ 0314tool_use + Read provider.ts +
+
+
+

+ The existing record window, kept verbatim for trust — but behind one deliberate + disclosure, with the matched record auto-open. Never the default lower half. +

+
+
+ + {/* 6 — ranking explanation */} + Why this matched +
+
+
collapsed
+ +
+
+
expanded
+ +
+ Relevance + Strong + Found in + assistant reply (2×) · session title + Match type + Exact words: “embeddings”, “QMD” + details + bm25 −8.42 · fts over title + body +
+
+

+ Field names humanised (“session title”, “assistant reply”), not “QMD title/body”. For + vector/hybrid this line becomes “Similar in meaning (not exact words)”. +

+
+
+ + {/* 7 — next actions */} + Next actions +
+
+ ▸ Open conversation + ⤴ Fork from here + ⧉ Copy ref + ⤓ Raw + +
+
    +
  • Open conversation (primary) — deep-link into the existing tail view at the matched record. Today’s only action (“Open file”) dumps raw JSONL; this is the move people expect.
  • +
  • Fork from here — seed a new session from this context (sco-049 / sco-062). Stub allowed; the affordance sets direction.
  • +
  • Copy ref · Raw · overflow holds Open transcript file & Search-within-session.
  • +
  • Card default click = preview; Enter = Open conversation.
  • +
+
+ + {/* how to read */} +
+
+ · how to read this study +
+

+ Every frame is static — frozen so the redesign reads against the “before” without + motion. The only required backend addition is a human{" "} + sessionTitle on the hit plus a + server-rendered card snippet; everything else reorders or renames fields that already + exist on KnowledgeHit and the source + preview. Build order and field mappings live in the companion spec,{" "} + docs/eng/reviews/knowledge-search-redesign-dewey.md. + Append ?focus=1 to the URL for a + chrome-free screenshot frame. +

+
+
+ ); +} diff --git a/design/studio/lib/studio-pages.ts b/design/studio/lib/studio-pages.ts index 1aaec663..45a7dfd3 100644 --- a/design/studio/lib/studio-pages.ts +++ b/design/studio/lib/studio-pages.ts @@ -193,6 +193,21 @@ export const STUDIO_PAGES: StudioPage[] = [ ], blurb: "Read-only window into the session-search index.db — schema, FTS5 MATCH, ad-hoc SELECT, schema-aware shortcuts.", }, + { + href: "/studies/search-results", + label: "Search Results", + bucket: "studies", + surface: "web", + family: "session-search", + status: "concept", + source: [ + "design/studio/app/studies/search-results/page.tsx", + "packages/web/client/screens/KnowledgeSearchScreen.tsx", + "packages/web/client/screens/KnowledgeSearchInspector.tsx", + "docs/eng/reviews/knowledge-search-redesign-dewey.md", + ], + blurb: "Result-experience redesign: session cards, conversation-first inspector, rendered vs raw, ranking, next actions.", + }, { href: "/studies/tree-viewer", label: "Tree Viewer", diff --git a/docs/eng/reviews/knowledge-search-redesign-dewey.md b/docs/eng/reviews/knowledge-search-redesign-dewey.md new file mode 100644 index 00000000..38f6f364 --- /dev/null +++ b/docs/eng/reviews/knowledge-search-redesign-dewey.md @@ -0,0 +1,288 @@ +# Design: OpenScout Knowledge Search result experience + +Author: **dewey**. Status: proposal for implementation. No production source mutated. +Companion mock: [`knowledge-search-redesign-mock.html`](./knowledge-search-redesign-mock.html) +(open in a browser). Source critique this builds on: +[`knowledge-search-ux-consult-dewey.md`](./knowledge-search-ux-consult-dewey.md). + +This is the "give us a design" follow-up: concrete layouts, states, copy, and field mappings +an implementer can build against `KnowledgeSearchScreen.tsx` / `KnowledgeSearchInspector.tsx` +without inventing structure. + +--- + +## Design principles + +1. **A result is a moment in a conversation, not a chunk in an index.** Every surface leads + with human content; index machinery (score, chunk, QMD, raw JSONL) is one disclosure away. +2. **One card per session.** Collapse the N near-identical chunk hits into a single + session card with expandable moments. Repetition is what makes it read like a log. +3. **Rendered by default, raw on demand.** `renderedText` everywhere first; raw JSONL behind a + collapsed "advanced" disclosure; tool/process records folded, never inline equals. +4. **Always offer a next move.** The card and inspector both answer "what now" with a primary + *Open conversation* and a *Fork from here*. + +--- + +## Information model + +A search returns chunk-level `KnowledgeHit`s today. The UI should group them into **session +results** client-side (key = `collectionId`), each holding 1..n **moments** (the individual +hits, each anchored to a `recordRange`). + +``` +SessionResult + collectionId + sessionTitle ← NEW backend field (see §8). Fallbacks: AI title → first user prompt → "project · topic" + harness, project, freshness (from hit.facets / hit.freshness) + bestMoment ← highest-ranked hit + moments[] ← all hits in this collection, sorted by score + matchCount ← moments.length +``` + +Everything below renders from fields that already exist on `KnowledgeHit` / +`KnowledgeSourcePreviewRecord` except `sessionTitle` (§8). + +--- + +## 1. Result card + +### Anatomy + +``` +┌────────────────────────────────────────────────────────────────────┐ +│ ◆ claude openscout · knowledge index 2d ago │ identity row +│ Designing the QMD knowledge search index │ headline = sessionTitle +│ “…we should embed selected QMD chunks, not raw source files, and │ rendered match quote +│ record the provider, model, and dimensions…” │ (bestMoment.renderedText) +│ Matched “embeddings” in an assistant reply Strong ●●●○ │ reason + strength +│ ▸ 3 matches in this session │ moments toggle (n>1 only) +└────────────────────────────────────────────────────────────────────┘ + hover ▸ [ Open conversation ] ⤴ Fork ⧉ Copy ref ⋯ +``` + +### Field mapping + +| Slot | Source | Notes | +| --- | --- | --- | +| harness icon + label | `facetText(hit,"harness")` | `◆`=claude, `◇`=codex; muted | +| project · collection | `facetText(hit,"project")` + collection title | middle-dot separated, muted | +| freshness | `hit.freshness` | right-aligned chip ("2d ago") | +| **headline** | `sessionTitle` (§8) | never the chunk H1 ("Events window 3") | +| match quote | `bestMoment.renderedText`, cleaned | strip `[NNNN] \`kind\`` markers server-side (§8); `` terms | +| reason | matched term + matched record role | "Matched **'x'** in an assistant reply" | +| strength | bucketed score | `●●●○` Strong / Good / Weak; never raw float | +| moments toggle | `moments.length` | only when > 1; expands inline to a compact moment list | + +### Expanded moments (inline, when card has >1 match) + +``` + ▾ 3 matches in this session + 0313 assistant reply “…embed selected QMD chunks, not raw…” ●●●○ + 0420 tool output “embeddings provider model dimensions” ●●○○ + 0511 your message “should embeddings be opt-in?” ●○○○ +``` + +Each moment row is itself selectable → opens the inspector deep-linked to that `recordRange`. + +--- + +## 2. Selected-result inspector + +### Anatomy + +``` +┌─ INSPECTOR ────────────────────────────────────────────────────────┐ +│ ◆ claude · openscout · 2d ago [ ✕ ] │ context + close +│ Designing the QMD knowledge search index │ sessionTitle +│ [ Open conversation ] ⤴ Fork from here ⧉ Copy ref ⤓ Raw │ ACTION BAR (primary left) +├─────────────────────────────────────────────────────────────────────┤ +│ CONVERSATION · records 308–340 │ +│ │ +│ ┌ user · 0312 │ +│ └ how should we store embeddings for the chunks? │ +│ │ +│ ┌ assistant · 0313 ● matched │ +│ └ We should **embed selected QMD chunks**, not raw source files, │ highlighted +│ and record provider, model, and dimensions so the index … │ +│ │ +│ ▸ 3 tool steps (Read, Bash, Edit) │ folded tool noise +│ │ +│ ┌ assistant · 0339 │ +│ └ …that keeps the index rebuildable without re-embedding. │ +│ │ +│ ▸ Why this matched Strong · exact words │ collapsed +│ ▸ Raw evidence (advanced) │ collapsed +│ │ +│ derived · observed source · indexed 2d ago │ provenance footer +│ ~/.claude/projects/openscout/.jsonl ⧉ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Sections, top → bottom + +1. **Context + close** — `harness · project · freshness`, close `✕` → `clearKnowledgeHit()`. +2. **Headline** — `sessionTitle`. Replaces "Selected result / ". +3. **Action bar** — see §5. Primary action is a filled button, left-most. +4. **Conversation excerpt** (default body) — the rendered slice; the centerpiece. See §4. +5. **Why this matched** — collapsed; one-line summary in the header row; expands to detail (§6). +6. **Raw evidence (advanced)** — collapsed; the existing `
` record window (§4). +7. **Provenance footer** — `origin` (mechanical/enrichment), `ownership` (derived/observed), + indexed-at, transcript path + copy. Small, persistent, monospace path. + +The **Indexer tab** stays as-is (it's an operator surface, not a result surface) — but move it +out of the per-result inspector into a top-level "Index" affordance so the result inspector is +purely about the selected result. (Optional, lower priority.) + +--- + +## 3. Interaction states + +| State | Trigger | Card list | Inspector | +| --- | --- | --- | --- | +| **No index** | `status.chunks === 0` | empty-state w/ "Build 3-day index" | "Select a result" placeholder | +| **Indexing** | `status.activeJobs[0]` present | skeleton cards + "Indexing… 42/260" | progress note | +| **Searching** | `searching` | keep prior results dimmed + top bar "Searching…" | keep prior selection | +| **Results** | hits > 0 | session cards | auto-select bestMoment of top card | +| **Empty query result** | hits === 0 | "No matches — try a project, file, or topic" | placeholder | +| **Card hover** | pointer | reveal action row, raise elevation | — | +| **Card selected** | click | `aria-pressed`, accent left-border | inspector populated | +| **Preview loading** | `loadingPreview` | — | excerpt skeleton ("Loading conversation…") | +| **No preview** | preview null (non-transcript ref) | — | show indexed snippet + "Raw evidence unavailable" | +| **Raw expanded** | user opens disclosure | — | record `
` list, matched auto-open | +| **Error** | `error` | inline `role="alert"` | inline alert, keep last good content | + +Keyboard: `↑/↓` move card selection, `Enter` = Open conversation, `Space` = expand moments, +`Esc` = clear selection. (Cards are already ` + +
7 matches across 3 sessions1,284 moments indexed
+ +
+ + + + + +
+
0088assistant replybrute-force cosine over embeddings…
+
0142assistant replysqlite-vec extension vs in-memory…
+
+
+ ▸ Open conversation⤴ Fork⧉ Copy ref +
+ + + + +
+ + + +
+
+
◆ claude·openscout·2d ago +
+

Designing the QMD knowledge search index

+
+ ▸ Open conversation + ⤴ Fork from here + ⧉ Copy ref + ⤓ Raw + +
+
+ +
Conversation records 308–340
+
+
+
user0312
+
how should we store embeddings for the chunks?
+
+
+
assistant0313● matched
+
We should embed selected QMD chunks, not raw source files, and record provider, model, and dimensions so the index stays rebuildable when the chunk policy changes.
+
+
▸ 3 tool steps  (Read, Bash, Edit)
+
+ 0314 Read packages/runtime/knowledge/embeddings/provider.ts
+ 0316 Bash node dist/cli index --source sessions
+ 0319 Edit knowledge.sqlite schema (embeddings table) +
+
+
assistant0339
+
…that keeps the index rebuildable without re-embedding everything when only the lexical layer changes.
+
+
+ +
+ ▸ Why this matched Strong · exact words +
+
+ RelevanceStrong ●●●○ + Found inassistant reply (2×) · session title + Match typeExact words: “embeddings”, “QMD” + detailsbm25 −8.42 · fts over title + body +
+
+
+ +
+ ▸ Raw evidence (advanced) records 308–344 · earlier hidden · later hidden +
+
0312userhow should we store embeddings… +
{ "type":"message","role":"user","content":[{"type":"text","text":"how should we store embeddings for the chunks?"}] }
+
0313assistantWe should embed selected QMD chunks… +
{ "type":"message","role":"assistant","content":[{"type":"text","text":"We should embed selected QMD chunks, not raw source files, and record provider, model, and dimensions…"}] }
+
0314tool_useRead provider.ts +
{ "type":"tool_use","name":"Read","input":{"file_path":"packages/runtime/knowledge/embeddings/provider.ts"} }
+
+
+ + +
+ + + + + diff --git a/docs/eng/reviews/knowledge-search-ux-consult-dewey.md b/docs/eng/reviews/knowledge-search-ux-consult-dewey.md new file mode 100644 index 00000000..a66955f4 --- /dev/null +++ b/docs/eng/reviews/knowledge-search-ux-consult-dewey.md @@ -0,0 +1,156 @@ +# UX Consult: OpenScout Knowledge Search results + +Reviewer: **dewey** (Claude-backed UX consult, requested by operator). No source mutations. + +Reviewed live: `http://127.0.0.1:3210/search` against +`packages/web/client/screens/KnowledgeSearchScreen.tsx`, +`KnowledgeSearchInspector.tsx`, `lib/knowledge-search.ts`, `knowledge-search.css`. + +## Root cause (one sentence) + +The data model is already rich — each hit carries `origin`, `ownership`, `freshness`, +`facets`, structured `sourceRefs`, and the preview exposes `renderedText` + per-record +`kind`/`role`/`matched` — but the **UI promotes the index's internal vocabulary to +primary content**: chunk titles like *"Events window 3"*, *"Indexed snippet"*, *"Index +rank 0.000 — lower sorts earlier"*, *"Raw JSONL evidence"*, *"matching chunks"*, *"derived +QMD chunks"*. Users see the machine that found the answer instead of the answer. Every fix +below is the same move: **lead with meaning, demote machinery to on-demand.** + +The good news: almost none of this needs new backend data. It's hierarchy, framing, and one +new field (a human session title on the hit). + +--- + +## 1. Result cards + +**Now:** title = raw QMD chunk H1 (`KnowledgeSearchScreen.tsx:252`, e.g. "Events window 3" +/ "Files touched" / "Tool calls"); snippet = event-window text still carrying +`- [0234] \`command_or_tool\`` markers that `displaySnippet()` tries to strip with a brittle +regex (`:95`); reason = "Matched N query terms in indexed QMD" (`:88`); a `` path block +competes with the content (`:264`). + +**Target card (top → bottom):** + +1. **Identity line** — the *session* title, not the chunk title: AI/derived session title → + else first user prompt → else `project · topic`. Prefix a harness icon (Claude/Codex) and a + freshness chip ("2d ago", from `hit.freshness`). If the chunk title is an event-window + label, never show it as the headline. +2. **The match as a clean quote** — one rendered sentence of the best matched *turn* + (`renderedText`), not the raw event-window line. Push the inspector's rendering into the hit + so the card is humanized server-side and `displaySnippet`'s regex can go away. Highlight terms. +3. **Context chips** — `project · harness · "in your message" | "in assistant reply" | "in tool + output" · freshness`. Naming *what kind of record matched* (from the matched record's + `kind`/`role`) is the cheapest way to make a card feel like a moment, not a row. +4. **Reason** — replace "Matched N query terms in indexed QMD" with "Matched **'embeddings'** + in an assistant reply." Name the term + the role; drop "indexed QMD." +5. **Path** — demote to muted/hover-only; it should not be a `` block at card altitude. + +**Dedupe by session.** Multiple chunk hits from one session currently render as N near-identical +cards — a primary driver of "feels like index records." Collapse to **one card per session** +with "3 matches · best: '…'"; expand to per-moment matches. Card = conversation, not chunk. + +## 2. Inspector + +**Now** (`KnowledgeSearchInspector.tsx`): Rendered hits (flat top-4, `:304`) → "Indexed +snippet" (`:324`) → "Why ranked here" with raw score + "lexical index over QMD title/body" +(`:329`) → transcript path → **Raw JSONL `
` list that occupies the whole lower panel** +(`:370`). It opens fine but slides into index-ese fast and raw JSONL dominates the viewport. + +**Target hierarchy (top → bottom):** + +1. **Header** — human session title + `harness · project · freshness` + the primary next-action + buttons (§5). Not "Selected result / ". +2. **Conversation excerpt (default, rendered)** — show the matched moment *in conversational + context*: the matched turn plus ~2 surrounding turns as role-labeled bubbles, terms + highlighted, **tool/system records folded** into "▸ 3 tool steps (Bash, Read)". This replaces + the flat top-4 "Rendered message hits" list and is the single highest-impact change — make the + default look like a conversation, not a record dump. You already have `renderedText`, + `recordKindLabel()`, and `recordPriority()` to drive the fold. +3. **Why this matched (collapsed)** — one plain line: "Matched 'embeddings' in 2 assistant + replies and 1 tool call." Expand for mechanics. See §4. +4. **Raw evidence (collapsed, advanced)** — keep the existing `
` record window; it's + great for trust, but it must be opt-in, not the default lower half. Keep matched record + auto-open (`firstOpenRecord`, `:166`). +5. **Provenance footer (persistent, small)** — `origin` (mechanical vs enrichment), `ownership` + (derived/observed), indexed-at, transcript path + copy. These fields already exist on the hit + and are exactly the trust signals to surface quietly. + +Drop the standalone "Indexed snippet" block — it's the chunk's raw text and reads as an index +artifact; the conversation excerpt supersedes it. + +## 3. Rendered vs raw JSONL + +- **Default is always rendered.** Cards and inspector lead with `renderedText` / conversation + rendering; raw JSONL is never the first thing shown. +- **Raw is explicit, on-demand.** Keep the `
` raw view behind a collapsed "Raw evidence + (advanced)" disclosure. Within it, matched record stays auto-expanded. +- **Tool/process noise** is the other half of the complaint. In the rendered excerpt, fold + tool/system records into a one-line summary ("▸ ran 3 tools") and expand on demand — don't + render tool calls as equal-weight bubbles. `recordPriority()` already ranks `assistant`/`user` + above `system` above tools; use it to *hide*, not just sort. +- Offer a per-record "rendered ⇄ raw" toggle for power users, but the panel-level default stays + rendered. + +## 4. Ranking explanation + +**Now:** "Index rank **0.000** — lower values sort earlier in lexical search"; "Matched in: +title, indexed snippet — lexical index over QMD title/body" (`:334`–`:349`). Accurate but +speaks index-ese, and the raw BM25 float as the headline is meaningless to a user. + +**Target:** +- Lead with a **plain sentence** built from matched terms + matched record roles: "Top match — + 'embeddings' appears in an assistant reply and the session title." +- Show a **qualitative strength** chip (Strong / Good / Weak) bucketed from normalized score; the + raw float lives only under "details." +- Translate fields to human terms: QMD `title`/`body` → "session title" / "your message" / + "assistant reply" / "tool output." Never show "QMD title/body." +- When `scoreSource` becomes `vector`/`hybrid`, say **"Similar in meaning (not exact words)"** vs + **"Exact words matched"** — that is the one ranking distinction users actually care about. +- Keep it to a single collapsed block, not the current three-column score panel. + +## 5. Next actions (the "unclear what to do after clicking" fix) + +**Now:** the only real action is "Open file" (`openTranscript`, `:246`) which dumps the raw +transcript, plus the Indexer tab. There is no "take me to the conversation" path — the most +expected action is missing. + +Per the SCO-062 thesis (search = retrieval **and** launch), give a clear primary + a small set, +in the inspector header and as the card's click intent: + +- **Open conversation** (primary) — open the session in the existing tail/conversation view, deep + linked to the matched record via `recordRange` / `firstOpenRecord`. This is the natural "I found + it, take me there." Today's click only fills the inspector; "Open file" gives raw JSONL instead. +- **Continue / fork from here** — seed a new session from this context (the context-pack/fork path + from sco-049 / sco-062). Even stubbed, the affordance answers "what now" and sets product + direction. +- **Open raw transcript** — today's "Open file," demoted to secondary. +- **Copy reference** — path + record range, for pasting into an agent. +- **Search within this session** — scope the query to this collection. + +Make the card communicate its primary destination ("Open conversation"), not just "select." + +## 6. Vocabulary cleanup (cheap, cross-cutting, high impact) + +The internal terms *are* the "feels like index records" feeling. Rename user-facing strings: + +| Now | Use | +| --- | --- | +| "N matching chunks" / "derived QMD chunks" (`:228`) | "N matches" / "N moments" | +| "Indexed snippet" (`:324`) | drop (replaced by excerpt) | +| "Raw JSONL evidence" (`:371`) | "Raw evidence (advanced)" | +| "Index rank — lower sorts earlier" (`:336`) | "Relevance: Strong/Good/Weak" | +| placeholder "Search QMD, embeddings… raw log drilldown" (`:185`) | "Search your sessions — topics, files, decisions…" | +| sample queries "QMD / MCP / context pack" (`:25`) | real recent topics/projects | + +## Priority order + +1. **Conversation excerpt + fold tool noise** in the inspector (§2.2, §3) — kills most of the + "raw record" feeling. +2. **Session-level human title + render the card snippet server-side** (§1.1–1.2) — fixes the + first thing users read. +3. **"Open conversation" primary action + deep-link** (§5) — answers "what now." +4. **Dedupe cards by session** (§1) — removes repetitive index-row feel. +5. **Vocabulary pass + qualitative ranking** (§4, §6) — low effort, broad polish. + +None of 1–5 requires new index data beyond adding a human session title to the hit; everything +else is reordering and renaming fields that already exist. diff --git a/docs/eng/reviews/sco-062-review-dewey.md b/docs/eng/reviews/sco-062-review-dewey.md new file mode 100644 index 00000000..62569282 --- /dev/null +++ b/docs/eng/reviews/sco-062-review-dewey.md @@ -0,0 +1,238 @@ +# Review: SCO-062 QMD Knowledge Search And Context Index + +Reviewer: **dewey** (docs/indexing judgment). Scope as requested: QMD as the durable +derived layer, source refs, chunking, FTS + optional embeddings, and keeping generated +collections inspectable. + +Grounded against the doc (`docs/eng/sco-062-...md`) **and** the existing prototype +(`design/studio/lib/studio/commands/extract-qmd.ts`, `index-corpus.ts`). + +## Verdict + +The architecture is sound. QMD-as-durable-spine with rebuildable FTS/vector projections +is the right shape, and the data-ownership boundary is preserved correctly. The gaps are +**not** in the design thesis — they are in the *contracts* that make "durable, rebuildable, +inspectable" actually hold: manifest versioning, deterministic content, stable chunk +identity, and structured source refs. The prototype shows the gaps concretely: the manifest +is `{source, harness, recordsScanned, bytesRead, window, generatedAt}` with no version, and +chunk ids are `INTEGER PRIMARY KEY` autoincrement. Both must change before embeddings or +incremental rebuild land. + +Fix the contracts in Phase 1 (the skeleton), not later — they are cheap now and expensive +to retrofit once collections and embeddings exist on disk. + +--- + +## Risks + +### QMD as the durable derived layer + +- **R1 — No format/version handle (blocker).** Prototype manifest has no `schema`, + `extractorVersion`, or `chunkPolicyVersion`. The acceptance criterion "the FTS index is + rebuildable" is unsatisfiable without one: you cannot tell which collections are stale + after the extractor or chunk policy changes, so every change forces a full global rebuild + or silent drift. This is the single highest-leverage fix. +- **R2 — "Durable" vs "rebuildable" are in tension for enrichment.** Mechanical QMD is + cheaply rederivable from source. Phase-3 LLM enrichment (summaries, decisions) is **not** — + it costs money and is non-deterministic. If the harness JSONL rotates or is deleted, the + enrichment becomes the *only* copy of real derived work. Treating all QMD as a "throwaway + projection" risks discarding expensive, unreproducible content. Classify per-document: + `origin: mechanical` (rebuildable) vs `origin: enrichment` (preserve; has provenance). +- **R3 — Non-deterministic content defeats change detection.** `buildManifest()` writes + `generatedAt: new Date().toISOString()` *into the content file*. Every re-extract produces + a different manifest even when nothing changed, so content-hash staleness checks and + git/diff inspection are both poisoned. Volatile fields (timestamps, timings, host) must be + segregated from the content identity hash. + +### Source refs + +- **R4 — Byte ranges into live JSONL are fragile.** `byteRange`/`recordRange` into an + append-only-but-growing transcript drift when the session is still live or the file is + rewritten. Drilldown can silently point at the wrong place. Record indices (`r.i`) are more + stable than byte offsets but only under deterministic parsing. Store a source anchor + (`sizeBytes` + `mtimeMs` + `contentHash`) so drilldown can detect "source changed since + indexed" and degrade to a soft match instead of mis-pointing. +- **R5 — Absolute paths are non-portable and leak layout.** Source refs in the prototype + carry raw `/Users/arach/...`. Even with cross-machine search out of scope, this breaks + inspectability/reproducibility and couples collections to one home dir. Store + `{ root: "", relPath }` against a small set of known roots (home, controlHome, repo + root) and resolve at read time. +- **R6 — Source ref type mismatch.** The doc's `KnowledgeSourceRef` is a typed tagged union; + the prototype index stores `source_ref` as a free-text column. Persist refs as structured + JSON and back them with the proposed `source_refs` table so facet filters and drilldown are + typed, not string-parsed. + +### Chunking + +- **R7 — No recorded chunk policy.** Two strategies coexist (350-record event windows; + H2-section split for markdown) with no `chunkPolicyVersion` on the chunk or document. When + policy changes you cannot identify stale chunks, and embeddings — which the doc says record + a chunking version — will silently mismatch the FTS chunks they were supposed to mirror. +- **R8 — Fixed modular windows cut semantic boundaries.** A 350-record window splits a tool + call from its result, or a question from its answer, hurting both snippet quality and + embedding quality. Prefer turn/exchange-aligned boundaries with a max-size cap; keep them + aligned to record indices so re-extraction stays reproducible. At minimum, add small + overlap or a carried context header per window. +- **R9 — H2-section chunks are unbounded.** One giant section becomes one huge chunk → weak + FTS snippets and over-limit embedding inputs. Add a `maxChars` cap with deterministic + sub-splitting and stable ordinals. +- **R10 — Chunk identity is unstable (blocker for embeddings).** `chunks.id INTEGER PRIMARY + KEY` autoincrement means ids churn on every reindex. Any embedding keyed on chunk id is + invalidated on every lexical rebuild even when the text is byte-identical — exactly the + expensive recompute embeddings are supposed to avoid. Use deterministic chunk ids + (`collectionId + documentPath + ordinal + chunkPolicyVersion`, or a hash of normalized + text). + +### FTS + +- **R11 — Tokenizer unspecified; default is wrong for code.** Default FTS5 tokenization + splits on `_ - . /`, so `snake_case`, `camelCase`, dotted identifiers, and file paths + tokenize poorly — bad for a developer-knowledge corpus. Choose explicitly: `unicode61` with + custom `tokenchars`, plus a trigram auxiliary index for substring/path search. Decide + stemming deliberately (porter helps prose, hurts exact symbol match) — a prose+code split + or trigram side index avoids picking one loser. +- **R12 — External-content FTS5 can desync.** `content='chunks'` is the right, space-efficient + choice, but bulk writes that bypass the triggers drift. Document the rebuild + (`INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')`) and surface an integrity check in + `scout search status`. +- **R13 — Ranking undefined.** "score" appears with no function. Specify `bm25()` with + per-column weights, and state that facets are SQL pre-filters, not relevance signals. + +### Embeddings + +- **R14 — Retrieval skew if FTS and vectors chunk differently.** Embeddings must consume the + *same canonical chunk text* FTS indexes, or lexical and semantic hits reference divergent + units. One canonical chunk record, two projections. +- **R15 — Model/dim migration needs coexistence.** Switching providers/models changes + dimensions. The embeddings table must key on `(chunk_id, provider, model, dim, version)` and + retain an `input_hash`, so you can (a) keep the old set live while building the new and + (b) detect when chunk text changed under a fixed model. One-vector-per-chunk will force + destructive rebuilds. +- **R16 — Vector store unstated.** `better-sqlite3` has no native vector search. At local + scale (<~100k chunks) brute-force cosine over blobs in `knowledge.sqlite` is fine — say so, + or commit to `sqlite-vec`. Keep vectors in the rebuildable index, never in + `control-plane.sqlite`. + +### Inspectability + +- **R17 — Prototype output is ephemeral.** QMD lands in `$TMPDIR/scout-study/qmd` and the + index in `$TMPDIR/.../index.db` — wiped on reboot. The doc's `controlHome/knowledge/qmd` is + right; flag the tmp→controlHome move as a real migration, and add `scout search status + --paths` (or `scout search where`) so the on-disk location is discoverable. +- **R18 — "Inspectable" requires a human entry point that the prototype doesn't emit.** The + doc's recommended shape includes `overview.md`, `source-refs.md`, `facets.md`; the + extractor emits only `manifest.json`, `files.md`, `tool-calls.md`, `events-NNN.md`. Make + `overview.md` mandatory and generated — a human opening a collection cold should understand + it without the DB. +- **R19 — Partial writes have no guard.** Files are written directly; a crash mid-build + leaves a half-written collection that the indexer will happily ingest. Add a manifest + `status: building | ready | failed` and write-temp-then-atomic-rename. +- **R20 — No GC / orphan policy.** Durable QMD + rebuildable index accumulates collections for + sessions that no longer exist or were superseded. Track `sourceState: live | complete | + gone` and expose orphan/stale reporting + a prune command in `status`. + +### Cross-cutting + +- **R21 — Evidence strength must be visible.** A hit citing an LLM-summarized "decision" is + weaker evidence than a raw transcript range. Record `origin`/`ownership` per document (not + just per hit) and let the UI/snippet distinguish mechanical extraction from inference. This + is both a trust and a data-ownership concern (the `derived` label in `data-ownership.md`). +- **R22 — Separate DB from day one (answers an open decision).** Churny FTS writes + WAL + + optional large vector blobs argue strongly against `control-plane.sqlite`. Keep + `knowledge.sqlite` separate immediately; the "start in control-plane" option in Open + Decisions is the higher-risk path. + +--- + +## Recommended manifest conventions + +A single versioned `manifest.json` per collection. Identity (hashable) is segregated from +volatile metadata. + +```jsonc +{ + "schema": "openscout.knowledge.collection/v1", // namespaced + versioned (R1) + "collectionId": "sessions/claude/", // /, namespaced (R-naming) + "kind": "sessions", + "title": "Claude — refactor knowledge index (2026-05-26)", + + "generator": { // volatile: excluded from contentHash (R3) + "extractorVersion": "1.4.0", + "generatedAt": "2026-06-04T12:00:00Z", + "host": "arachs-mac-mini" // debug only + }, + + "source": { // structured, portable ref (R5, R6) + "kind": "harness_transcript", + "harness": "claude", + "ref": { "root": "CLAUDE_HOME", "relPath": "projects//.jsonl" }, + "sessionId": "", + "sizeBytes": 55512345, // source anchor for drift detection (R4) + "mtimeMs": 1748000000000, + "contentHash": "sha256:…", + "recordsScanned": 12009, + "sourceState": "complete" // live | complete | gone (R20) + }, + + "chunking": { // recorded policy, versioned (R7,R9) + "events": { "strategy": "record-window", "window": 350, "overlap": 0, "maxChars": 8000, "version": 2 }, + "markdown": { "strategy": "h2-section", "maxChars": 4000, "version": 1 } + }, + + "documents": [ // per-doc origin + provenance (R2,R21) + { "path": "overview.md", "kind": "overview", "origin": "mechanical", "bytes": 1024, "chunks": 1, "contentHash": "sha256:…" }, + { "path": "events-001.md", "kind": "events", "origin": "mechanical", "bytes": 40000, "chunks": 6, "contentHash": "sha256:…" }, + { "path": "decisions.md", "kind": "decisions", "origin": "enrichment", "bytes": 2000, "chunks": 3, "contentHash": "sha256:…", + "provenance": { "model": "claude-…", "promptVersion": "dec@3", "generatedAt": "…", "inputChunkIds": ["…"], "costTokens": 4200 } } + ], + + "facets": { "harness": "claude", "project": "openscout", "dateRange": ["2026-05-26", "2026-05-26"] }, + + "ownership": "derived", // scout_owned | derived | observed_source + "contentHash": "sha256:…", // over {source.contentHash, chunking, documents[].contentHash, versions} — NOT generatedAt (R3) + "status": "ready" // building | ready | failed (R19) +} +``` + +Rules: + +1. **`schema` is mandatory and versioned.** Bump on any breaking format change; the indexer + refuses or migrates unknown majors. +2. **`contentHash` excludes volatile fields.** Same source bytes + same versions → same hash, + so staleness is a cheap comparison and the manifest diffs cleanly in git. +3. **`origin` per document.** Mechanical = safe to delete and rebuild. Enrichment = carries + `provenance` and is treated as preservable derived content. +4. **Source refs are `{root, relPath}` + an anchor (`sizeBytes`/`mtimeMs`/`contentHash`)** — + never bare absolute paths, always enough to detect source drift. +5. **`status` + atomic rename** so half-built collections are never indexed. +6. **`sourceState`** drives GC/prune; `status status --paths` prints the on-disk root. + +## Recommended doc / collection conventions + +- **Directory = `/`** so adapters (sessions, skills, mcp, codebase, + context-packs) share one namespace without id collisions. Matches the doc's `KnowledgeCollection.kind`. +- **Mandatory files:** `manifest.json` + `overview.md` (human entry point) + `source-refs.md` + (or fold refs into the manifest, but keep one of the two). Everything else is adapter + optional, as the doc's table already lays out. +- **Every markdown doc is self-describing.** Start each with an H1 and a `Source:` line citing + the canonical ref + record/line range, so a doc opened *outside* the DB is still traceable + back to authority. This is what makes "inspectable on disk" real rather than nominal. +- **Stable, zero-padded ordinals** (`events-001.md`) — already done; keep it, and apply the + same to sub-split markdown chunks. +- **`_`-prefixed files are internal/non-indexed.** The indexer already skips them + (`fileName.startsWith("_")`); promote that to a documented convention for enrichment + scratch (`_llm-call.json`, etc.). +- **Embed chunk boundaries in the QMD itself** (an HTML comment carrying the chunk's + `source_ref`) so the markdown remains the source of truth for chunking and the SQLite index + is fully regenerable from disk — the literal meaning of "rebuildable projection." +- **Deterministic chunk ids** (`collectionId + documentPath + ordinal + chunkPolicyVersion`) + so embeddings survive lexical reindex (R10). + +## Quick answers to Open Decisions + +- **`knowledge.sqlite` separate from control-plane?** Yes — from day one (R22). +- **QMD path?** `controlHome/knowledge/qmd//` as proposed, namespaced by kind. +- **Default adapter after sessions?** Skills first (cheap, high agent value, small corpus), + then MCP/capabilities; codebase stays conservative/opt-in as the doc says. +- **First embedding backend?** A local provider (honors local-first + opt-in cost) behind the + DI interface, with remote as a configured option — not a hosted default. diff --git a/docs/eng/sco-062-qmd-knowledge-search-and-context-index.md b/docs/eng/sco-062-qmd-knowledge-search-and-context-index.md new file mode 100644 index 00000000..87569f60 --- /dev/null +++ b/docs/eng/sco-062-qmd-knowledge-search-and-context-index.md @@ -0,0 +1,845 @@ +# SCO-062: QMD Knowledge Search And Context Index + +## Status + +Proposed. + +## Proposal ID + +`sco-062` + +## Intent + +Define a Scout-native knowledge search and context index that uses QMD-style +derived documents as its durable spine, then builds lexical search, metadata +filters, optional embeddings, chat retrieval, and context-pack forking on top. + +The goal is broader than session history search. Session transcripts are the +first source adapter because the current pain is finding prior conversations +without scanning large JSONL files. The same substrate should also support +skills, MCP/tool capabilities, codebase context, extension packs, and +Contextual-style reusable session templates. + +## Context + +The current session-search prototype already proved a useful pipeline: + +- parse Codex and Claude JSONL into normalized records +- emit QMD-style markdown sidecars such as `manifest.json`, `files.md`, + `tool-calls.md`, and `events-NNN.md` +- build a SQLite FTS5 index over chunks of those derived documents +- optionally enrich selected sessions with summaries and decisions + +See [`sco-059`](./sco-059-session-knowledge-search-exploration.md) for the +session-focused exploration and local sizing sample. That document remains the +best description of the immediate use case. + +The next step should not be a narrowly named `session-search` package. The +search surface is becoming a general way to navigate local developer knowledge: + +- Which recent session discussed a topic? +- Which skill should an agent load for this task? +- Which MCP server or tool exposes this capability? +- Which code files or docs matter for this API? +- Which reusable context pack should a new session fork from? + +These questions share a shape: discover source material, normalize it into +derived QMD documents, index the documents, retrieve chunks with stable source +refs, and drill back to authority when exact evidence matters. + +## Decision + +Scout SHOULD introduce a generic knowledge subsystem backed by QMD collections. + +This remains the right scope for `sco-062`; do not split the storage, API, and +indexing decisions into a second proposal. The needed work is to tighten this +proposal's contracts before implementation, not to create a new architecture. + +The first implementation SHOULD live inside `packages/runtime` as a +broker-hosted internal subsystem, not as several new packages. In other words, +the broker process is the product owner and API boundary, while the runtime +package is the implementation home because it already contains the broker +daemon, support-path handling, tail discovery, and SQLite control-plane code. + +```text +packages/runtime/src/knowledge/ +|-- index.ts +|-- types.ts +|-- qmd/ +|-- query/ +|-- adapters/ +| |-- sessions.ts +| |-- skills.ts +| |-- mcp.ts +| |-- codebase.ts +| `-- context-packs.ts +|-- stores/ +| `-- sqlite.ts +`-- embeddings/ + `-- provider.ts +``` + +If the subsystem later needs to be reused outside runtime, it MAY be extracted +as one package, for example `@openscout/knowledge`. It SHOULD NOT split into +separate public packages for sessions, embeddings, QMD, skills, and context +packs in the first product phase. + +The first product slice SHOULD focus on session search only. Skills, MCP, +codebase docs, embeddings, saved searches, and context packs should remain +designed-for but not implemented until the QMD, source-ref, job, and API +contracts are stable. + +## Product Thesis + +Search should become a retrieval and launch layer for local agent work, not just +a log finder. + +The useful primitive is a searchable collection with source anchors and context +intent. A collection can represent a week of harness sessions, installed skills, +MCP tools, relevant repository docs, or a curated context pack. The user or an +agent should be able to search across those collections, inspect why a result +matched, and either drill into the source or launch/fork a new session from the +retrieved context. + +QMD is the durable derived knowledge layer. FTS, vector embeddings, facets, and +chat retrieval are rebuildable projections over QMD. Raw source material remains +the authority for exact evidence. + +## Core Pipeline + +```text +source material + -> adapter normalization + -> QMD collection + -> lexical index + -> optional embedding index + -> search, chat, fork, and drilldown +``` + +### Source Material + +Source material can include: + +- Codex and Claude JSONL sessions +- Scout-owned messages, invocations, flights, work items, and checkpoints +- skills and slash-command definitions +- MCP server tool schemas and capability metadata +- project docs and selected codebase context +- extension packs and project overlays +- Contextual-style reusable session templates or context packs + +Each source remains owned by its native system. The knowledge subsystem reads, +references, and derives from it. + +### QMD Collection + +A QMD collection is a derived document directory with a manifest and one or more +markdown documents. A collection is durable and inspectable, but it is not the +canonical source for external systems. + +Recommended common shape: + +```text +collection/ +|-- manifest.json +|-- overview.md +|-- source-refs.md +|-- facets.md +`-- chunks/ + |-- chunk-001.md + `-- chunk-002.md +``` + +Adapters MAY add domain-specific documents: + +| Adapter | Example QMD documents | +| --- | --- | +| sessions | `events-NNN.md`, `files.md`, `tool-calls.md`, `decisions.md` | +| skills | `capability.md`, `triggers.md`, `examples.md` | +| MCP/tools | `tools.md`, `schemas.md`, `use-cases.md` | +| codebase | `docs.md`, `symbols.md`, `files.md`, `routes.md` | +| context packs | `purpose.md`, `included-context.md`, `fork-policy.md` | + +### QMD V1 Contract + +The first implementation MUST define a small versioned QMD contract before +porting the prototype extractor. This is the root-cause fix for later rebuild, +embedding, and drilldown problems. + +Each collection manifest MUST include: + +- `schema`, for example `openscout.knowledge.collection/v1` +- `collectionId` in a namespaced form such as `/` +- `kind`, `title`, `createdAt`, and `updatedAt` +- generator metadata, including `extractorVersion` +- source metadata with structured refs and source anchors +- chunking metadata, including `chunkPolicyVersion` +- document inventory with `origin` and `contentHash` +- collection `contentHash` that excludes volatile fields such as generated time +- collection `status`: `building`, `ready`, or `failed` +- ownership/provenance labels + +QMD writes MUST be atomic enough that partially built collections are not +indexed. Build into a temporary directory or `status: building`, then atomically +promote to `ready` only after all documents and manifest content are complete. + +Chunk ids MUST be deterministic. Do not use SQLite autoincrement ids as the +stable identity for a chunk. A valid first policy is: + +```text +sha256(collectionId + documentPath + ordinal + chunkPolicyVersion + normalizedTextHash) +``` + +This keeps FTS rebuilds, embeddings, inspect routes, and saved references stable +when byte-identical chunks survive a reindex. + +Markdown documents SHOULD be self-describing when opened outside Scout: + +- begin with an H1 +- include a short source/provenance line +- keep stable zero-padded ordinals for generated event windows +- use `_`-prefixed files for non-indexed scratch/internal material + +Mechanical extraction and LLM enrichment are different document origins: + +- `mechanical` documents are cheaply rebuildable from source material +- `enrichment` documents are derived Scout-owned work products with model, + prompt, input chunk, generated-at, and cost provenance + +Search results must surface this difference so a user can tell raw mechanical +evidence from inferred summaries or decisions. + +### Lexical Index + +SQLite FTS5 SHOULD be the default first search engine. + +It is local-first, cheap to build, good enough for exact/fuzzy lookup, and easy +to rebuild from QMD. The first version should optimize for usefulness before +embedding complexity: + +- exact phrase and fuzzy-ish topic lookup +- facet filters by source type, project, harness, skill, provider, file path, + date range, and freshness +- snippets and source refs +- sub-100ms query latency over common local corpora + +The FTS table SHOULD use an explicit tokenizer and ranking policy. The default +SQLite tokenizer is a poor fit for developer text because paths, dotted names, +snake_case, and dashed identifiers matter. The first implementation SHOULD +choose `unicode61` deliberately, document token characters/stemming choices, and +add a trigram or equivalent side path if substring/path search is needed. Ranking +SHOULD be expressed as `bm25()` with documented column weights; facets should be +SQL filters, not implicit relevance signals. + +### Embedding Index + +Embeddings SHOULD be designed into the pipeline but remain optional. + +The embedding layer should: + +- embed selected QMD chunks, not raw source files +- record provider, model, dimensions, chunking version, and generated-at time +- support rebuild when provider, model, or chunk policy changes +- allow local or remote embedding providers through dependency injection +- stay disabled by default until the user enables semantic search + +Embeddings are valuable for vague recall and cross-vocabulary matching, but they +should not block the first useful product slice. + +### Retrieval And Drilldown + +Every search hit SHOULD include: + +- collection id +- source type +- source ref +- chunk id and score +- snippet +- freshness metadata +- ownership label: `scout_owned`, `derived`, or `observed_source` +- drilldown target, such as transcript path plus record range, skill path, + MCP schema path, code file path, or context-pack manifest + +The user should be able to ask a conversational question over the derived +corpus, but answers should cite QMD chunks and preserve raw drilldown for +confidence-sensitive cases. + +## Data Ownership + +This proposal preserves the existing Scout boundary from +[`data-ownership.md`](../data-ownership.md). + +Separate three classes explicitly: + +1. **Canonical Scout-owned records.** User-created saved searches, curated + context packs, indexing schedules, and explicit knowledge preferences are + broker-owned product records when they exist. They require broker write APIs + and migrations like other Scout-owned records. +2. **Durable derived knowledge.** QMD documents, chunks, source refs, facets, + and enrichments are Scout-generated derived artifacts. They are durable and + inspectable, but they are not the authority for external source material. +3. **Rebuildable projections.** FTS rows, vector rows, rank metadata, and most + job scratch state are projections over QMD and source refs. + +Scout owns or controls: + +- knowledge collection metadata +- generated QMD documents +- chunk records +- FTS/vector projections +- source references +- user-created saved searches and curated context packs +- derived summaries, labels, facets, and enrichment records + +Scout observes or references: + +- harness transcript JSONL +- skill source files +- MCP schemas and tool metadata +- codebase files and docs +- extension pack contents +- external Contextual assets + +The subsystem MUST NOT bulk-import external harness turns as Scout messages. +It MUST NOT treat observed source material as broker-owned conversation state. +It also MUST NOT use QMD as a disguised full transcript warehouse. Session QMD +should be bounded, chunked, provenance-rich derived material for search and +drilldown; exact evidence remains the raw source ref unless a workflow +explicitly creates a Scout-owned summary, decision, note, or context pack. + +## Storage Model + +The first implementation SHOULD use the OpenScout support directory and keep +knowledge storage separate from canonical control-plane tables unless a table is +explicitly broker-owned metadata. + +Recommended shape: + +```text +$OPENSCOUT_CONTROL_HOME/ +|-- control-plane.sqlite +`-- knowledge/ + |-- qmd/ + | `-- / + | `-- / + `-- knowledge.sqlite +``` + +`knowledge.sqlite` is a rebuildable index and metadata store. It should not +become a second canonical broker database. + +Keep `knowledge.sqlite` separate from `control-plane.sqlite` from day one. FTS +churn, optional vector blobs, rebuilds, and WAL behavior should not compete with +the canonical broker coordination database. If future saved searches, schedules, +or curated context packs become durable first-party product records, add them as +explicit broker-owned metadata rather than smuggling them into rebuildable index +tables. + +Runtime should expose support-path helpers for: + +- `knowledgeRoot` +- `knowledgeQmdRoot` +- `knowledgeSqlitePath` +- per-collection QMD paths + +Initial session-search tables: + +- `collections` +- `documents` +- `chunks` +- `chunks_fts` +- `facets` +- `source_refs` +- `index_jobs` + +The `embeddings` table is deferred until semantic search is enabled. When it is +added, it should key by `(chunk_id, provider, model, dimensions, +chunk_policy_version, input_hash)` so model migrations and lexical rebuilds do +not invalidate byte-identical chunks unnecessarily. + +## Broker-Hosted Runtime Boundary + +The subsystem lives in `packages/runtime`, but the broker process owns the +product boundary. That means: + +- broker APIs admit index/query/inspect requests +- broker configuration and feature flags decide what can run +- broker-owned job state records status, leases, progress, cancellation, and + failures +- runtime implementation code performs source discovery, QMD extraction, + indexing, cache reads, and rebuilds +- permission and data-ownership checks happen before source reads, drilldown, or + launch/fork actions + +Indexing MUST NOT run as ambient background work by default. It should run only +from an explicit user action, broker config, or an explicit schedule. + +Job execution should use broker-style durable semantics: a job has an id, state, +lease owner, lease generation, progress counters, checkpoints where useful, and +terminal state. Stale workers must not be able to overwrite a newer lease's +terminal result. + +The web UI should not import indexer code directly. It should call broker APIs. + +The CLI should expose the same broker-backed operations: + +```bash +scout search status +scout search index --source sessions --days 7 +scout search index --source skills +scout search query "raw log drilldown" +scout search inspect +``` + +## API Sketch + +Broker HTTP routes should exist before web/CLI wiring: + +| Route | Meaning | +| --- | --- | +| `GET /v1/knowledge/status` | index paths, sizes, collection counts, stale/orphan counts, active jobs | +| `POST /v1/knowledge/index` | enqueue or run an indexing job and return a job receipt | +| `GET /v1/knowledge/jobs/:jobId` | inspect job state/progress/failure | +| `POST /v1/knowledge/query` | query compact hits from the built index | +| `GET /v1/knowledge/inspect/:hitOrChunkId` | resolve QMD preview and typed raw drilldown | + +The CLI and web server should relay through these broker routes. `inspect` can +return a local file/path drilldown for a trusted local surface, but the broker +should not blindly open arbitrary paths as a side effect of search. + +```ts +export interface KnowledgeCollection { + id: string; + kind: "sessions" | "skills" | "mcp" | "codebase" | "context_pack" | "mixed"; + title: string; + sourceRefs: KnowledgeSourceRef[]; + qmdPath: string; + status: "building" | "ready" | "failed"; + contentHash: string; + extractorVersion: string; + chunkPolicyVersion: string; + createdAt: number; + updatedAt: number; + facets: Record; +} +``` + +```ts +export interface KnowledgeChunk { + id: string; + collectionId: string; + documentId: string; + documentPath: string; + ordinal: number; + text: string; + textHash: string; + origin: "mechanical" | "enrichment"; + ownership: "scout_owned" | "derived" | "observed_source"; + sourceRefs: KnowledgeSourceRef[]; + facets: Record; +} +``` + +```ts +export interface KnowledgeSourceAnchor { + sizeBytes?: number; + mtimeMs?: number; + contentHash?: string; +} + +export interface KnowledgePortablePath { + root: "HOME" | "OPENSCOUT_CONTROL_HOME" | "OPENSCOUT_SUPPORT_DIRECTORY" | "PROJECT_ROOT" | "ABSOLUTE"; + relPath: string; +} + +export type KnowledgeSourceRef = + | { kind: "harness_transcript"; harness: string; path: KnowledgePortablePath; sessionId?: string; recordRange?: [number, number]; byteRange?: [number, number]; anchor?: KnowledgeSourceAnchor } + | { kind: "scout_record"; recordKind: string; id: string } + | { kind: "skill"; path: KnowledgePortablePath; skillName?: string; anchor?: KnowledgeSourceAnchor } + | { kind: "mcp_tool"; serverId: string; toolName: string; schemaPath?: string } + | { kind: "file"; path: KnowledgePortablePath; lineRange?: [number, number]; anchor?: KnowledgeSourceAnchor } + | { kind: "context_pack"; path: KnowledgePortablePath; packId?: string; schemaVersion?: string; anchor?: KnowledgeSourceAnchor }; +``` + +```ts +export interface KnowledgeSearchQuery { + q: string; + collections?: string[]; + sourceKinds?: KnowledgeCollection["kind"][]; + facets?: Record; + limit?: number; + mode?: "lexical" | "semantic" | "hybrid"; +} +``` + +```ts +export type KnowledgeDrilldown = + | { kind: "qmd"; collectionId: string; documentPath: string; chunkId?: string } + | { kind: "harness_transcript"; sourceRef: Extract } + | { kind: "file"; sourceRef: Extract } + | { kind: "scout_record"; sourceRef: Extract } + | { kind: "mcp_tool"; sourceRef: Extract }; + +export interface KnowledgeSearchHit { + id: string; + collectionId: string; + documentId: string; + chunkId: string; + title: string; + snippet: string; + score: number; + scoreSource: "fts" | "vector" | "hybrid"; + origin: "mechanical" | "enrichment"; + ownership: "scout_owned" | "derived" | "observed_source"; + freshness: "fresh" | "stale" | "source_missing" | "unknown"; + sourceRefs: KnowledgeSourceRef[]; + drilldown: KnowledgeDrilldown[]; + facets: Record; +} +``` + +```ts +export interface KnowledgeIndexRequest { + source: "sessions" | "skills" | "mcp" | "codebase" | "context_pack"; + days?: number; + collections?: string[]; + force?: boolean; + mode?: "foreground" | "background"; +} + +export interface KnowledgeIndexJob { + id: string; + source: KnowledgeIndexRequest["source"]; + state: "queued" | "running" | "waiting" | "completed" | "failed" | "cancelled"; + leaseOwner?: string; + leaseGeneration: number; + progress: { + discovered?: number; + extracted?: number; + indexed?: number; + failed?: number; + }; + createdAt: number; + updatedAt: number; + completedAt?: number; + error?: string; +} +``` + +## Source Adapters + +### Sessions + +The sessions adapter is the first implementation target. + +Inputs: + +- `getTailDiscovery()` transcript inventory +- Codex and Claude JSONL files +- optional Scout-owned invocation/message refs + +Outputs: + +- session-level QMD manifest +- event window markdown +- file path catalog +- tool-call catalog +- generated `overview.md` +- optional decisions enrichment after the first mechanical slice +- structured source refs with portable transcript path, harness, session id, + record range, byte offsets where available, and source anchors for drift + detection + +The first sessions slice should index only mechanical extraction. LLM-generated +overview/decisions enrichment should be added later as opt-in job work with +provenance. + +### Skills + +The skills adapter should index installed skills and project-local skill +exports from extension packs. + +Outputs: + +- capability summary +- trigger phrases +- usage examples +- source refs to `SKILL.md` +- facets for skill name, provider, project applicability, and activation terms + +This lets agents ask "is there a skill for X?" through the same search surface +instead of hardcoding a separate skill finder. + +### MCP And Capabilities + +The MCP adapter should project tool schemas into QMD and align with +[`sco-040`](./sco-040-capability-registry-and-tool-boundaries.md). + +Outputs: + +- server summary +- tool list +- input/output schema summaries +- effect and enforcement metadata +- source refs to MCP server/tool records + +Search results should distinguish "this tool exists" from "this actor may use +this tool now." Permission evaluation remains broker/capability-registry work. + +### Codebase + +The codebase adapter should be conservative in v1. + +It SHOULD index docs and explicit context roots first, not the entire repository +by default. Code symbol extraction can follow once file selection and freshness +policies are stable. + +Potential sources: + +- `README.md` +- `docs/` +- `llms.txt` +- selected package READMEs +- generated agent docs +- explicit user-selected files or globs + +### Context Packs + +Context packs represent forkable prepared sessions or reusable work states. +They are related to [`sco-049`](./sco-049-session-forking-and-excellent-session-states.md), +but higher level. + +A context pack can describe: + +- purpose +- intended task class +- recommended model/profile/harness +- included docs and source refs +- required skills +- useful MCP capabilities +- prompt fragments +- workspace assumptions +- permission posture +- fork policy + +Search should make context packs discoverable, then runtime can launch or fork a +session from the chosen pack. + +## Performance And Resource Policy + +Idle overhead SHOULD be near zero. + +The subsystem should not scan or embed in the background unless the user, broker +configuration, or an explicit schedule enables it. Indexing should be resumable +and observable. + +Initial budgets for a heavy local week from `sco-059`: + +| Stage | Input | Target | +| --- | --- | --- | +| Inventory | hundreds of files | 1-5s | +| Mechanical QMD extraction | hundreds of MiB JSONL | minutes, streamed | +| FTS index | tens of MiB QMD | minutes | +| First useful query | local SQLite index | under 100ms | +| LLM enrichment | selected chunks | async and optional | +| Embeddings | selected chunks | opt-in and resumable | + +Memory policy: + +- stream source files +- bound per-file buffers +- do not load large JSONL files into memory +- batch SQLite writes +- chunk before enrichment or embedding + +Disk policy: + +- QMD collections and indexes are rebuildable +- raw source is not copied +- embedding storage is optional +- expose index size in `scout search status` + +## User Surfaces + +### Web + +The existing Search screen should evolve from static concept view to operational +surface: + +- index status +- source toggles +- build/refresh controls +- query box +- facet filters +- ranked results +- QMD preview +- raw drilldown +- context-pack fork action + +### CLI + +The CLI should make the subsystem scriptable and agent-friendly: + +```bash +scout search status --json +scout search index --source sessions --days 7 +scout search query "which conversation discussed QMD embeddings?" --json +scout search inspect +``` + +### MCP + +Scout MCP should eventually expose: + +- `knowledge_search` +- `knowledge_status` +- `knowledge_index` +- `knowledge_inspect` +- `context_pack_search` +- `context_pack_fork` + +Those tools should return compact hits and source refs, not bulk QMD documents +by default. + +## Scope + +In scope: + +- generic knowledge subsystem inside runtime +- QMD collection format and manifest shape +- session transcript adapter as the first source +- FTS5 lexical index +- source refs and raw drilldown +- search status and query APIs +- web Search surface wired to real broker data +- design for optional embeddings +- follow-on adapters for skills, MCP/capabilities, codebase docs, and context + packs +- resource budgets and opt-in background policy + +Out of scope for the first slice: + +- saved searches and canonical user-created context packs +- skills, MCP/capability, codebase, and context-pack adapters +- LLM enrichment by default +- publishing multiple new npm packages +- indexing every file on disk by default +- embedding all historical chunks automatically +- importing external transcript turns as Scout messages +- replacing MCP permission evaluation +- enterprise audit/compliance guarantees +- cross-machine replicated search indexes +- perfect semantic answer generation over all local history + +## Implementation Plan + +### Phase 0: Proposal And Team Alignment + +1. Accept this proposal as the broad architecture target after adding the + broker/runtime, storage, job, and QMD contract clarifications above. +2. Keep `sco-059` as the session-search evidence and sizing appendix. +3. Treat first implementation as session search only. + +### Phase 1: Internal Knowledge Skeleton + +1. Add `packages/runtime/src/knowledge`. +2. Add support-path helpers for `controlHome/knowledge`, QMD root, and + `knowledge.sqlite`. +3. Define collection, manifest, document, chunk, source-ref, query, hit, + inspect, status, and job types. +4. Define the QMD v1 manifest contract, deterministic chunk ids, portable source + refs, and atomic write policy. +5. Add `KnowledgeStore` over separate `knowledge.sqlite` with session-search + tables. +6. Add tests with small Codex and Claude fixtures. +7. Keep the existing Studio commands either as thin callers or delete them after + product wiring lands. + +### Phase 2: Session Search Product Slice + +1. Discover recent transcript files through the runtime tail inventory. +2. Build QMD collections for selected sessions. +3. Build `knowledge.sqlite` with FTS5 chunks and source refs. +4. Add broker APIs for status, index, query, and inspect. +5. Wire `KnowledgeSearchScreen` to real data. +6. Support raw drilldown to existing session/tail views. +7. Add CLI commands for `scout search status`, `index`, `query`, and `inspect`. + +### Phase 3: Optional Enrichment And Embeddings + +1. Add async enrichment jobs over selected QMD chunks. +2. Store derived overview, decisions, files, problems, and next-action docs. +3. Add embedding provider interface. +4. Add opt-in semantic and hybrid search. +5. Show cost, freshness, and provider metadata in status. + +### Phase 4: Skills And MCP Adapters + +1. Index installed skills as QMD collections. +2. Index MCP/capability registry metadata. +3. Add search facets for skill triggers, capability provider, effect, and + enforcement. +4. Let agents use the search API to discover relevant skills/tools before + asking the user or scanning files. + +### Phase 5: Context Packs + +1. Define context-pack QMD/manifest shape. +2. Index curated context packs. +3. Add fork/launch actions that bridge to session policy from `sco-049`. +4. Support Contextual-style prepared sessions as an explicit source adapter + without making Contextual a hard dependency. + +## Workstreams + +| Workstream | Owner role | Output | +| --- | --- | --- | +| Architecture | runtime/broker reviewer | Confirm storage, package boundary, APIs, and data ownership | +| Session adapter | indexing implementer | Port parser/QMD/FTS prototype into runtime with fixtures | +| Product surface | web/UI implementer | Turn Search screen into operational index/query surface | +| Context packs | Contextual/product reviewer | Align reusable session templates with QMD collection model | +| Skills and MCP | capability reviewer | Align skills/tool search with capability registry and pack model | + +## Acceptance Criteria + +- The first product slice can answer "which recent session was about this + topic?" without scanning raw JSONL at query time. +- Session hits include stable QMD chunks and raw transcript source refs. +- Chunk ids are deterministic across byte-identical rebuilds. +- Source refs are structured, portable, and include enough anchor metadata to + detect stale or missing raw sources. +- QMD collections are inspectable on disk. +- QMD writes are atomic enough that partial collections are not indexed. +- QMD manifests carry schema, extractor, content-hash, and chunk-policy + versions. +- The FTS index is rebuildable and does not become canonical control-plane + state. +- Broker APIs exist for status, index, job inspection, query, and inspect. +- Index jobs expose state/progress and protect terminal writes with lease + generation or equivalent stale-worker checks. +- Idle overhead is effectively zero when indexing is not running. +- Embeddings are possible through the same chunk model but disabled by default. +- The architecture can add skills, MCP, codebase docs, and context packs without + creating multiple new public packages. +- The web Search surface reads broker APIs, not filesystem/index internals. + +## Decisions Taken For First Implementation + +- Keep this as `sco-062`; revise in place rather than writing a new proposal. +- Keep first implementation inside `packages/runtime` as a broker-hosted + subsystem. +- Store QMD under `$OPENSCOUT_CONTROL_HOME/knowledge/qmd//`. +- Keep `knowledge.sqlite` separate from `control-plane.sqlite` from day one. +- Ship session search first; no default skills/MCP/codebase/context-pack + indexing in the first slice. +- Keep embeddings disabled by default and behind dependency-injected providers. + +## Remaining Open Decisions + +- Which source adapters are enabled by default after sessions: skills only, or + skills plus MCP/capabilities? +- What is the minimum manifest schema needed before context packs can launch + forked sessions? +- Which embedding provider should be the first supported opt-in backend? +- Should skill and MCP search be exposed as separate UI filters or a unified + "capabilities" source group? diff --git a/packages/runtime/package.json b/packages/runtime/package.json index f8f54f20..7cdb3c02 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -152,6 +152,11 @@ "types": "./src/conversations/legacy-ids.ts", "bun": "./src/conversations/legacy-ids.ts", "default": "./dist/conversations/legacy-ids.js" + }, + "./knowledge": { + "types": "./src/knowledge/index.ts", + "bun": "./src/knowledge/index.ts", + "default": "./dist/knowledge/index.js" } }, "types": "./src/index.ts", diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index 7d54e539..119c94e3 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -37,3 +37,4 @@ export * from "./issue-runner-service.js"; export * from "./session-attention.js"; export * from "./vantage-plan.js"; export * from "./conversations/index.js"; +export * from "./knowledge/index.js"; diff --git a/packages/runtime/src/knowledge.test.ts b/packages/runtime/src/knowledge.test.ts new file mode 100644 index 00000000..bd591f4b --- /dev/null +++ b/packages/runtime/src/knowledge.test.ts @@ -0,0 +1,179 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { + deterministicKnowledgeChunkId, + knowledgeCollectionQmdPath, + resolveOpenScoutKnowledgePaths, + SQLiteKnowledgeStore, + type KnowledgeCollection, + type KnowledgeDocument, + type KnowledgeSourceRef, +} from "./knowledge/index.ts"; + +const roots = new Set(); +const originalControlHome = process.env.OPENSCOUT_CONTROL_HOME; +const originalSupportDirectory = process.env.OPENSCOUT_SUPPORT_DIRECTORY; + +afterEach(() => { + for (const root of roots) { + rmSync(root, { recursive: true, force: true }); + } + roots.clear(); + if (originalControlHome === undefined) delete process.env.OPENSCOUT_CONTROL_HOME; + else process.env.OPENSCOUT_CONTROL_HOME = originalControlHome; + if (originalSupportDirectory === undefined) delete process.env.OPENSCOUT_SUPPORT_DIRECTORY; + else process.env.OPENSCOUT_SUPPORT_DIRECTORY = originalSupportDirectory; +}); + +function tempRoot(prefix: string): string { + const root = mkdtempSync(join(tmpdir(), prefix)); + roots.add(root); + return root; +} + +function useTempSupportPaths(): ReturnType { + const root = tempRoot("openscout-knowledge-"); + process.env.OPENSCOUT_CONTROL_HOME = join(root, "control-plane"); + process.env.OPENSCOUT_SUPPORT_DIRECTORY = join(root, "support"); + return resolveOpenScoutKnowledgePaths(); +} + +function sourceRef(): KnowledgeSourceRef { + return { + kind: "harness_transcript", + harness: "codex", + path: { + root: "HOME", + relPath: ".codex/sessions/2026/06/session.jsonl", + }, + sessionId: "session-1", + recordRange: [1, 12], + anchor: { + sizeBytes: 1234, + mtimeMs: 1780000000000, + contentHash: "sha256:source", + }, + }; +} + +function collection(paths: ReturnType): KnowledgeCollection { + return { + id: "sessions/codex/session-1", + kind: "sessions", + title: "Codex session 1", + sourceRefs: [sourceRef()], + qmdPath: join(paths.qmdRoot, "sessions", "codex", "session-1"), + status: "ready", + contentHash: "sha256:collection", + extractorVersion: "test-extractor-v1", + chunkPolicyVersion: "test-policy-v1", + createdAt: 1780000000000, + updatedAt: 1780000000001, + facets: { + harness: "codex", + project: "openscout", + }, + }; +} + +function document(collectionId: string): KnowledgeDocument { + return { + id: "doc-session-1-overview", + collectionId, + path: "overview.md", + kind: "overview", + origin: "mechanical", + contentHash: "sha256:document", + }; +} + +describe("knowledge paths", () => { + test("resolve under OPENSCOUT_CONTROL_HOME and reject escaping collection ids", () => { + const paths = useTempSupportPaths(); + expect(paths.knowledgeRoot).toEndWith("control-plane/knowledge"); + expect(paths.qmdRoot).toEndWith("control-plane/knowledge/qmd"); + expect(paths.sqlitePath).toEndWith("control-plane/knowledge/knowledge.sqlite"); + + const collectionPath = knowledgeCollectionQmdPath("sessions/codex/session-1"); + expect(collectionPath).toBe(join(paths.qmdRoot, "sessions", "codex", "session-1")); + expect(() => knowledgeCollectionQmdPath("sessions/../escape")).toThrow("invalid collectionId segment"); + }); +}); + +describe("SQLiteKnowledgeStore", () => { + test("stores collections, stable chunks, lexical search hits, and job status", () => { + const paths = useTempSupportPaths(); + const store = new SQLiteKnowledgeStore(undefined, paths); + try { + const storedCollection = collection(paths); + const storedDocument = document(storedCollection.id); + store.upsertCollection(storedCollection); + store.upsertDocument(storedDocument); + + const text = "This session discussed QMD knowledge indexing, broker APIs, and raw transcript drilldown."; + const chunkId = deterministicKnowledgeChunkId({ + collectionId: storedCollection.id, + documentPath: storedDocument.path, + ordinal: 1, + chunkPolicyVersion: storedCollection.chunkPolicyVersion, + text, + }); + const chunkIdAgain = deterministicKnowledgeChunkId({ + collectionId: storedCollection.id, + documentPath: storedDocument.path, + ordinal: 1, + chunkPolicyVersion: storedCollection.chunkPolicyVersion, + text, + }); + expect(chunkIdAgain).toBe(chunkId); + + store.upsertChunk({ + id: chunkId, + collectionId: storedCollection.id, + documentId: storedDocument.id, + documentPath: storedDocument.path, + ordinal: 1, + text, + textHash: "sha256:text", + origin: "mechanical", + ownership: "derived", + sourceRefs: [sourceRef()], + facets: { + harness: "codex", + project: "openscout", + }, + }); + + const hits = store.searchLexical({ q: "QMD", limit: 5 }); + expect(hits).toHaveLength(1); + expect(hits[0]?.chunkId).toBe(chunkId); + expect(hits[0]?.origin).toBe("mechanical"); + expect(hits[0]?.ownership).toBe("derived"); + expect(hits[0]?.drilldown.map((entry) => entry.kind)).toContain("qmd"); + expect(hits[0]?.drilldown.map((entry) => entry.kind)).toContain("harness_transcript"); + + const job = store.createIndexJob({ source: "sessions", days: 7 }); + const running = store.updateIndexJob({ + id: job.id, + state: "running", + leaseOwner: "test-worker", + leaseGeneration: 1, + progress: { discovered: 1, extracted: 1 }, + }); + expect(running?.state).toBe("running"); + expect(running?.leaseGeneration).toBe(1); + + const status = store.status(); + expect(status.collections).toBe(1); + expect(status.readyCollections).toBe(1); + expect(status.chunks).toBe(1); + expect(status.activeJobs.map((activeJob) => activeJob.id)).toContain(job.id); + expect(status.paths.sqlitePath).toBe(paths.sqlitePath); + } finally { + store.close(); + } + }); +}); diff --git a/packages/runtime/src/knowledge/index.ts b/packages/runtime/src/knowledge/index.ts new file mode 100644 index 00000000..15cf33f4 --- /dev/null +++ b/packages/runtime/src/knowledge/index.ts @@ -0,0 +1,4 @@ +export * from "./types.js"; +export * from "./paths.js"; +export * from "./store.js"; +export * from "./session-indexer.js"; diff --git a/packages/runtime/src/knowledge/paths.ts b/packages/runtime/src/knowledge/paths.ts new file mode 100644 index 00000000..a2469672 --- /dev/null +++ b/packages/runtime/src/knowledge/paths.ts @@ -0,0 +1,48 @@ +import { join, normalize, sep } from "node:path"; + +import { resolveOpenScoutSupportPaths } from "../support-paths.js"; + +export interface OpenScoutKnowledgePaths { + knowledgeRoot: string; + qmdRoot: string; + sqlitePath: string; +} + +export function resolveOpenScoutKnowledgePaths(): OpenScoutKnowledgePaths { + const supportPaths = resolveOpenScoutSupportPaths(); + return { + knowledgeRoot: supportPaths.knowledgeDirectory, + qmdRoot: supportPaths.knowledgeQmdDirectory, + sqlitePath: supportPaths.knowledgeSqlitePath, + }; +} + +function safeCollectionSegments(collectionId: string): string[] { + const segments = collectionId + .split(/[\\/]+/) + .map((segment) => segment.trim()) + .filter((segment) => segment.length > 0); + + if (segments.length === 0) { + throw new Error("collectionId must contain at least one path segment"); + } + + for (const segment of segments) { + if (segment === "." || segment === ".." || segment.includes("\0")) { + throw new Error(`invalid collectionId segment: ${segment}`); + } + } + + return segments; +} + +export function knowledgeCollectionQmdPath(collectionId: string): string { + const paths = resolveOpenScoutKnowledgePaths(); + const out = join(paths.qmdRoot, ...safeCollectionSegments(collectionId)); + const normalizedRoot = normalize(paths.qmdRoot); + const normalizedOut = normalize(out); + if (normalizedOut !== normalizedRoot && !normalizedOut.startsWith(`${normalizedRoot}${sep}`)) { + throw new Error(`collectionId escapes QMD root: ${collectionId}`); + } + return out; +} diff --git a/packages/runtime/src/knowledge/session-indexer.ts b/packages/runtime/src/knowledge/session-indexer.ts new file mode 100644 index 00000000..df5dd901 --- /dev/null +++ b/packages/runtime/src/knowledge/session-indexer.ts @@ -0,0 +1,984 @@ +import { createHash } from "node:crypto"; +import { + createReadStream, + existsSync, + mkdirSync, + readdirSync, + renameSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { homedir } from "node:os"; +import { basename, join, relative, sep } from "node:path"; + +import { + deterministicKnowledgeChunkId, + SQLiteKnowledgeStore, +} from "./store.js"; +import { knowledgeCollectionQmdPath, resolveOpenScoutKnowledgePaths } from "./paths.js"; +import type { + KnowledgeChunk, + KnowledgeCollection, + KnowledgeDocument, + KnowledgeFacets, + KnowledgeIndexJob, + KnowledgePortablePath, + KnowledgeSourceRef, +} from "./types.js"; + +export interface IndexRecentSessionKnowledgeInput { + days?: number; + limit?: number; + force?: boolean; +} + +export interface IndexedSessionKnowledgeSummary { + collectionId: string; + title: string; + harness: string; + project: string; + transcriptPath: string; + qmdPath: string; + records: number; + documents: number; + chunks: number; + bytes: number; + mtimeMs: number; + skipped?: boolean; + error?: string; +} + +export interface IndexRecentSessionKnowledgeResult { + job: KnowledgeIndexJob; + days: number; + discovered: number; + indexed: number; + failed: number; + sessions: IndexedSessionKnowledgeSummary[]; +} + +type Harness = "codex" | "claude"; + +type SessionFile = { + harness: Harness; + path: string; + mtimeMs: number; + size: number; +}; + +type NormalizedKind = + | "session_meta" + | "user_turn" + | "assistant_turn" + | "command_or_tool" + | "observation" + | "system_record" + | "unknown"; + +type NormalizedRecord = { + i: number; + ts?: string; + kind: NormalizedKind; + tag?: string; + text?: string; + tool?: { name: string; input: unknown }; + result?: { ok?: boolean; output: unknown }; + meta?: Record; + refs?: { id?: string; parentId?: string; sessionId?: string }; + sourceType: string; + sourceOffset: number; +}; + +type ParseResult = { + harness: Harness; + records: NormalizedRecord[]; + scannedLines: number; + bytesRead: number; + contentHash: string; + cwd: string | null; + sessionId: string | null; +}; + +type ExtractedDocument = { + path: string; + kind: string; + content: string; + sourceRef: KnowledgeSourceRef; +}; + +const EXTRACTOR_VERSION = "session-qmd-v1"; +const CHUNK_POLICY_VERSION = "session-qmd-record-window-v1"; +const EVENT_WINDOW_RECORDS = 350; +const EVENT_CHUNK_RECORDS = 50; +const DEFAULT_DAYS = 3; +const DEFAULT_LIMIT = 220; + +function clampPositiveInt(value: number | undefined, fallback: number, max: number): number { + if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallback; + return Math.min(max, Math.floor(value)); +} + +function sessionRoots(): Array<{ harness: Harness; root: string }> { + const home = homedir(); + const roots: Array<{ harness: Harness; root: string }> = [ + { harness: "codex", root: process.env.OPENSCOUT_TAIL_CODEX_SESSIONS_ROOT ?? join(home, ".codex", "sessions") }, + { harness: "codex", root: join(home, ".openai-codex", "sessions") }, + { harness: "claude", root: process.env.OPENSCOUT_TAIL_CLAUDE_PROJECTS_ROOT ?? join(home, ".claude", "projects") }, + ]; + return roots.filter((entry, index, entries) => + existsSync(entry.root) + && entries.findIndex((candidate) => candidate.harness === entry.harness && candidate.root === entry.root) === index + ); +} + +function discoverRecentSessionFiles(days: number, limit: number): SessionFile[] { + const cutoff = Date.now() - days * 24 * 60 * 60 * 1000; + const files: SessionFile[] = []; + for (const { harness, root } of sessionRoots()) { + const stack = [root]; + while (stack.length > 0) { + const dir = stack.pop()!; + let entries: string[] = []; + try { + entries = readdirSync(dir); + } catch { + continue; + } + for (const entry of entries) { + const path = join(dir, entry); + let stats; + try { + stats = statSync(path); + } catch { + continue; + } + if (stats.isDirectory()) { + stack.push(path); + continue; + } + if (!entry.endsWith(".jsonl") || stats.mtimeMs < cutoff) continue; + files.push({ harness, path, mtimeMs: stats.mtimeMs, size: stats.size }); + } + } + } + return files + .sort((left, right) => right.mtimeMs - left.mtimeMs) + .slice(0, limit); +} + +async function parseJsonl(file: SessionFile): Promise { + const records: NormalizedRecord[] = []; + const hash = createHash("sha256"); + let carry = ""; + let offset = 0; + let index = 0; + let cwd: string | null = null; + let sessionId: string | null = null; + + const handleLine = (rawLine: string) => { + const lineOffset = offset; + offset += Buffer.byteLength(rawLine, "utf8") + 1; + if (!rawLine.trim()) return; + try { + const value = JSON.parse(rawLine) as unknown; + if (!value || typeof value !== "object" || Array.isArray(value)) { + records.push({ + i: index++, + kind: "unknown", + sourceType: "non_object", + sourceOffset: lineOffset, + }); + return; + } + const record = normalizeRecord(value as Record, index, lineOffset, file.harness); + records.push(record); + cwd ??= inferCwd(record); + sessionId ??= inferSessionId(record, file); + index++; + } catch { + records.push({ + i: index++, + kind: "unknown", + sourceType: "unparseable", + sourceOffset: lineOffset, + }); + } + }; + + for await (const chunk of createReadStream(file.path, { encoding: "utf8" })) { + hash.update(chunk); + carry += chunk; + const lines = carry.split(/\r?\n/u); + carry = lines.pop() ?? ""; + for (const line of lines) handleLine(line); + } + if (carry.length > 0) handleLine(carry); + + return { + harness: file.harness, + records, + scannedLines: records.length, + bytesRead: offset, + contentHash: `sha256:${hash.digest("hex")}`, + cwd, + sessionId, + }; +} + +function normalizeRecord( + obj: Record, + i: number, + sourceOffset: number, + harness: Harness, +): NormalizedRecord { + return harness === "codex" + ? normalizeCodex(obj, i, sourceOffset) + : normalizeClaude(obj, i, sourceOffset); +} + +function normalizeCodex( + obj: Record, + i: number, + sourceOffset: number, +): NormalizedRecord { + const type = String(obj.type ?? ""); + const ts = typeof obj.timestamp === "string" ? obj.timestamp : undefined; + const payload = recordValue(obj.payload) ?? {}; + const base = { i, ts, sourceType: type, sourceOffset }; + + if (type === "session_meta") { + return { + ...base, + kind: "session_meta", + tag: "meta", + meta: payload, + refs: { sessionId: stringValue(payload.id) }, + }; + } + if (type === "turn_context") { + return { ...base, kind: "system_record", tag: "turn_context", meta: payload }; + } + if (type === "response_item") return normalizeCodexInner(payload, base); + if (type === "event_msg") return normalizeCodexEvent(payload, base); + if (type === "message") return normalizeCodexMessage(payload, base); + if (type === "function_call" || type === "local_shell_call") return normalizeCodexTool(payload, base); + if (type === "function_call_output" || type === "local_shell_call_output") return normalizeCodexResult(payload, base); + if (type === "reasoning") return normalizeCodexReasoning(payload, base); + return { ...base, kind: "system_record", tag: type || "record", text: compactJson(payload) }; +} + +type CodexBase = { i: number; ts?: string; sourceType: string; sourceOffset: number }; + +function normalizeCodexInner(payload: Record, base: CodexBase): NormalizedRecord { + const type = String(payload.type ?? ""); + if (type === "message") return normalizeCodexMessage(payload, base); + if (type === "reasoning") return normalizeCodexReasoning(payload, base); + if (type === "function_call" || type === "local_shell_call") return normalizeCodexTool(payload, base); + if (type === "function_call_output" || type === "local_shell_call_output") return normalizeCodexResult(payload, base); + return { ...base, kind: "system_record", tag: type || "response_item", meta: payload }; +} + +function normalizeCodexEvent(payload: Record, base: CodexBase): NormalizedRecord { + const type = String(payload.type ?? ""); + if (type === "user_message") { + return { ...base, kind: "user_turn", tag: "user", text: String(payload.message ?? payload.text ?? "") }; + } + if (type === "agent_message") { + return { ...base, kind: "assistant_turn", tag: "assistant", text: String(payload.message ?? payload.text ?? "") }; + } + return { ...base, kind: "system_record", tag: type || "event_msg", meta: payload }; +} + +function normalizeCodexMessage(payload: Record, base: CodexBase): NormalizedRecord { + const role = String(payload.role ?? ""); + const text = extractText(payload.content); + if (role === "user") return { ...base, kind: "user_turn", tag: "user", text }; + if (role === "assistant") return { ...base, kind: "assistant_turn", tag: "assistant", text }; + return { ...base, kind: "system_record", tag: role || "message", text }; +} + +function normalizeCodexTool(payload: Record, base: CodexBase): NormalizedRecord { + const name = String(payload.name ?? payload.command ?? "tool"); + const input = payload.arguments ?? payload.args ?? payload.input ?? {}; + return { ...base, kind: "command_or_tool", tag: name, tool: { name, input } }; +} + +function normalizeCodexResult(payload: Record, base: CodexBase): NormalizedRecord { + return { ...base, kind: "observation", tag: "result", result: { output: payload.output ?? payload.content ?? "" } }; +} + +function normalizeCodexReasoning(payload: Record, base: CodexBase): NormalizedRecord { + let text = ""; + if (Array.isArray(payload.summary)) { + text = payload.summary + .map((entry) => recordValue(entry)?.text) + .filter((entry): entry is string => typeof entry === "string") + .join(" "); + } + return { ...base, kind: "assistant_turn", tag: "reasoning", text: text || stringValue(payload.content) || "" }; +} + +function normalizeClaude( + obj: Record, + i: number, + sourceOffset: number, +): NormalizedRecord { + const type = String(obj.type ?? ""); + const ts = typeof obj.timestamp === "string" ? obj.timestamp : undefined; + const refs = { + id: stringValue(obj.uuid), + parentId: stringValue(obj.parentUuid), + sessionId: stringValue(obj.sessionId) ?? stringValue(obj.session_id), + }; + const base = { i, ts, sourceType: type, sourceOffset, refs }; + + if (type === "user") { + const message = recordValue(obj.message); + return { ...base, kind: "user_turn", tag: "user", text: extractText(message?.content ?? obj.content) }; + } + if (type === "assistant") { + const message = recordValue(obj.message); + const content = message?.content ?? obj.content; + const tool = Array.isArray(content) + ? content.map(recordValue).find((entry) => entry?.type === "tool_use") + : null; + if (tool) { + const name = String(tool.name ?? "tool"); + return { + ...base, + kind: "command_or_tool", + tag: name, + sourceType: "tool_use", + tool: { name, input: tool.input ?? {} }, + }; + } + return { ...base, kind: "assistant_turn", tag: "assistant", text: extractText(content) }; + } + if (type === "tool_use") { + const name = String(obj.name ?? "tool"); + return { ...base, kind: "command_or_tool", tag: name, tool: { name, input: obj.input ?? {} } }; + } + if (type === "tool_result") { + return { ...base, kind: "observation", tag: "result", result: { output: obj.content ?? "" } }; + } + if (type === "system") { + return { ...base, kind: "system_record", tag: "system", text: extractText(obj.content) }; + } + return { ...base, kind: "system_record", tag: type || "record", meta: obj }; +} + +function inferCwd(record: NormalizedRecord): string | null { + const meta = record.meta; + const cwd = stringValue(meta?.cwd); + return cwd && cwd.trim() ? cwd : null; +} + +function inferSessionId(record: NormalizedRecord, file: SessionFile): string | null { + return record.refs?.sessionId + ?? stringValue(record.meta?.id) + ?? stringValue(record.meta?.sessionId) + ?? basename(file.path).replace(/\.jsonl$/u, ""); +} + +function recordValue(value: unknown): Record | null { + return value && typeof value === "object" && !Array.isArray(value) + ? value as Record + : null; +} + +function stringValue(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value : undefined; +} + +function extractText(content: unknown): string { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content + .map((entry) => { + if (typeof entry === "string") return entry; + const block = recordValue(entry); + if (!block) return ""; + if (typeof block.text === "string") return block.text; + if (typeof block.content === "string") return block.content; + return ""; + }) + .filter(Boolean) + .join(" "); +} + +function compactJson(value: unknown): string { + try { + return JSON.stringify(value); + } catch { + return ""; + } +} + +function trimOneLine(value: string, max: number): string { + const flat = value.replace(/\s+/g, " ").trim(); + return flat.length <= max ? flat : `${flat.slice(0, Math.max(0, max - 3))}...`; +} + +function hashText(value: string): string { + return `sha256:${createHash("sha256").update(value).digest("hex")}`; +} + +function stableId(value: string, length = 16): string { + return createHash("sha256").update(value).digest("hex").slice(0, length); +} + +function portablePath(filePath: string): KnowledgePortablePath { + const home = homedir(); + const paths = resolveOpenScoutKnowledgePaths(); + const roots: Array<{ root: KnowledgePortablePath["root"]; path: string }> = [ + { root: "OPENSCOUT_CONTROL_HOME", path: paths.knowledgeRoot.replace(new RegExp(`${sep}knowledge$`), "") }, + { root: "HOME", path: home }, + ]; + for (const root of roots) { + const rel = relative(root.path, filePath); + if (rel && !rel.startsWith("..") && !rel.startsWith(sep)) { + return { root: root.root, relPath: rel }; + } + } + return { root: "ABSOLUTE", relPath: filePath }; +} + +function sourceRefFor(file: SessionFile, parse: ParseResult, range?: [number, number]): KnowledgeSourceRef { + return { + kind: "harness_transcript", + harness: file.harness, + path: portablePath(file.path), + sessionId: parse.sessionId ?? undefined, + recordRange: range, + anchor: { + sizeBytes: file.size, + mtimeMs: file.mtimeMs, + contentHash: parse.contentHash, + }, + }; +} + +function sourceRefWithRecordRange(ref: KnowledgeSourceRef, range: [number, number]): KnowledgeSourceRef { + return ref.kind === "harness_transcript" ? { ...ref, recordRange: range } : ref; +} + +function projectName(cwd: string | null, filePath: string): string { + if (cwd) return basename(cwd); + const claudeProjectMatch = /\/\.claude\/projects\/([^/]+)/u.exec(filePath); + if (claudeProjectMatch?.[1]) return claudeProjectMatch[1].replace(/^-/, "").replace(/-/g, "/").split("/").pop() || "claude"; + return basename(filePath).replace(/\.jsonl$/u, ""); +} + +function titleFor(file: SessionFile, parse: ParseResult, project: string): string { + const firstUser = parse.records.find((record) => record.kind === "user_turn" && record.text?.trim()); + const date = new Date(file.mtimeMs).toLocaleString("en-US", { + month: "short", + day: "numeric", + hour: "numeric", + minute: "2-digit", + }); + const goal = firstUser?.text ? ` - ${trimOneLine(firstUser.text, 82)}` : ""; + return `${capitalize(file.harness)} ${project} ${date}${goal}`; +} + +function capitalize(value: string): string { + return value.slice(0, 1).toUpperCase() + value.slice(1); +} + +function extractPaths(input: unknown): string[] { + if (input == null) return []; + if (typeof input === "string") { + try { + return extractPaths(JSON.parse(input)); + } catch { + const matches = input.match(/(?:\.\/|\.\.\/|~\/|\/)[\w./~_\-+]+\.[\w]+/gu); + return matches ?? []; + } + } + if (typeof input !== "object" || Array.isArray(input)) return []; + const obj = input as Record; + const paths = new Set(); + for (const key of ["path", "file_path", "filePath", "filename", "filenames"]) { + const value = obj[key]; + if (typeof value === "string") paths.add(value); + if (Array.isArray(value)) { + for (const entry of value) if (typeof entry === "string") paths.add(entry); + } + } + const command = obj.command ?? obj.cmd; + if (typeof command === "string") { + const matches = command.match(/(?:\.\/|\.\.\/|~\/|\/)[\w./~_\-+]+\.[\w]+/gu); + if (matches) for (const match of matches) paths.add(match); + } + return [...paths]; +} + +function oneLineInput(input: unknown): string { + const text = typeof input === "string" ? input : compactJson(input ?? {}); + return trimOneLine(text, 120); +} + +function summarizeRecord(record: NormalizedRecord): string { + if (record.text) return trimOneLine(record.text, 180); + if (record.tool) return `name=${record.tool.name} input=${oneLineInput(record.tool.input)}`; + if (record.result) { + const output = typeof record.result.output === "string" + ? record.result.output + : compactJson(record.result.output ?? ""); + return trimOneLine(output, 180); + } + if (record.meta) return trimOneLine(compactJson(record.meta), 180); + return ""; +} + +function buildOverview(parse: ParseResult, file: SessionFile, project: string, title: string): string { + const userTurns = parse.records.filter((record) => record.kind === "user_turn" && record.text?.trim()); + const assistantTurns = parse.records.filter((record) => record.kind === "assistant_turn" && record.text?.trim()); + const tools = parse.records.filter((record) => record.kind === "command_or_tool"); + const firstUser = userTurns[0]?.text ? trimOneLine(userTurns[0].text, 700) : "No user turn text detected."; + const latestAssistant = assistantTurns.at(-1)?.text ? trimOneLine(assistantTurns.at(-1)!.text!, 700) : "No assistant text detected."; + const modified = new Date(file.mtimeMs).toISOString(); + return [ + `# ${title}`, + "", + `Source: ${file.harness} transcript ${file.path}`, + `Project: ${project}`, + `Modified: ${modified}`, + "", + "## Session Frame", + "", + firstUser, + "", + "## Latest Assistant Context", + "", + latestAssistant, + "", + "## Mechanical Summary", + "", + `- Records: ${parse.records.length}`, + `- User turns: ${userTurns.length}`, + `- Assistant turns: ${assistantTurns.length}`, + `- Tool calls: ${tools.length}`, + `- Raw size: ${file.size} bytes`, + "", + ].join("\n"); +} + +function buildFiles(parse: ParseResult): string { + const counts = new Map(); + const tools = new Map>(); + for (const record of parse.records) { + if (record.kind !== "command_or_tool" || !record.tool) continue; + for (const path of extractPaths(record.tool.input)) { + counts.set(path, (counts.get(path) ?? 0) + 1); + const names = tools.get(path) ?? new Set(); + names.add(record.tool.name); + tools.set(path, names); + } + } + const rows = [...counts.entries()].sort((left, right) => right[1] - left[1]); + const lines = [ + "# Files touched", + "", + `Distinct paths: ${rows.length}.`, + "", + "| path | hits | tools |", + "| --- | ---: | --- |", + ]; + for (const [path, hits] of rows) { + lines.push(`| \`${path}\` | ${hits} | ${[...(tools.get(path) ?? [])].sort().join(", ")} |`); + } + if (rows.length === 0) lines.push("| _no paths detected_ | 0 | |"); + return `${lines.join("\n")}\n`; +} + +function buildToolCalls(parse: ParseResult): string { + const calls = parse.records.filter((record) => record.kind === "command_or_tool" && record.tool); + const byName = new Map(); + for (const call of calls) byName.set(call.tool!.name, (byName.get(call.tool!.name) ?? 0) + 1); + const lines = [ + "# Tool calls", + "", + `Total calls: ${calls.length}.`, + "", + "## By tool", + "", + "| tool | calls |", + "| --- | ---: |", + ]; + for (const [name, count] of [...byName.entries()].sort((left, right) => right[1] - left[1])) { + lines.push(`| \`${name}\` | ${count} |`); + } + lines.push("", "## Sample", ""); + for (const call of calls.slice(0, 80)) { + lines.push(`- [${String(call.i).padStart(4, "0")}] \`${call.tool!.name}\` ${oneLineInput(call.tool!.input)}`); + } + return `${lines.join("\n")}\n`; +} + +function buildEventDocuments(parse: ParseResult, file: SessionFile): ExtractedDocument[] { + const docs: ExtractedDocument[] = []; + for (let start = 0; start < parse.records.length; start += EVENT_WINDOW_RECORDS) { + const slice = parse.records.slice(start, start + EVENT_WINDOW_RECORDS); + const index = String(Math.floor(start / EVENT_WINDOW_RECORDS) + 1).padStart(3, "0"); + const first = slice[0]?.i ?? start; + const last = slice.at(-1)?.i ?? first; + const sourceRef = sourceRefFor(file, parse, [first, last]); + const lines = [ + `# Events window ${index}`, + "", + `Source: ${file.path}`, + `Records: ${first}..${last}`, + "", + ]; + for (const record of slice) { + lines.push(`- [${String(record.i).padStart(4, "0")}] \`${record.kind}\` (${record.tag ?? record.sourceType}) - ${summarizeRecord(record)}`); + } + docs.push({ + path: `events-${index}.md`, + kind: "events", + content: `${lines.join("\n")}\n`, + sourceRef, + }); + } + return docs; +} + +function buildDocuments(parse: ParseResult, file: SessionFile, project: string, title: string): ExtractedDocument[] { + const allSourceRef = sourceRefFor(file, parse, parse.records.length > 0 ? [0, parse.records.at(-1)!.i] : undefined); + return [ + { path: "overview.md", kind: "overview", content: buildOverview(parse, file, project, title), sourceRef: allSourceRef }, + { path: "files.md", kind: "files", content: buildFiles(parse), sourceRef: allSourceRef }, + { path: "tool-calls.md", kind: "tool-calls", content: buildToolCalls(parse), sourceRef: allSourceRef }, + ...buildEventDocuments(parse, file), + ]; +} + +function chunkDocument(document: ExtractedDocument): Array<{ text: string; sourceRef: KnowledgeSourceRef }> { + if (document.kind !== "events") { + return splitMarkdownSections(document.content).map((text) => ({ text, sourceRef: document.sourceRef })); + } + const lines = document.content.split("\n"); + const chunks: Array<{ text: string; sourceRef: KnowledgeSourceRef }> = []; + let header: string[] = []; + let current: { first: number; last: number; records: number; lines: string[] } | null = null; + const flush = () => { + if (!current) return; + const text = current.lines.join("\n").trim(); + if (text) { + chunks.push({ + text, + sourceRef: sourceRefWithRecordRange(document.sourceRef, [current.first, current.last]), + }); + } + current = null; + }; + for (const line of lines) { + const match = /^- \[(\d+)\]/u.exec(line); + if (!match) { + if (current) current.lines.push(line); + else if (line.trim()) header.push(line); + continue; + } + const record = Number(match[1]); + if (!current || current.records >= EVENT_CHUNK_RECORDS) { + flush(); + current = { + first: record, + last: record, + records: 1, + lines: header.length > 0 ? [...header, "", line] : [line], + }; + header = []; + } else { + current.last = record; + current.records++; + current.lines.push(line); + } + } + flush(); + return chunks.length > 0 ? chunks : [{ text: document.content, sourceRef: document.sourceRef }]; +} + +function splitMarkdownSections(content: string): string[] { + const lines = content.split("\n"); + const chunks: string[] = []; + let current: string[] = []; + const flush = () => { + const text = current.join("\n").trim(); + if (text) chunks.push(text); + current = []; + }; + for (const line of lines) { + if (line.startsWith("## ") && current.length > 0) flush(); + current.push(line); + } + flush(); + return chunks; +} + +function writeQmdCollection( + collection: KnowledgeCollection, + documents: ExtractedDocument[], + parse: ParseResult, + file: SessionFile, +): void { + const outDir = collection.qmdPath; + const tmpDir = `${outDir}.tmp-${process.pid}`; + rmSync(tmpDir, { recursive: true, force: true }); + mkdirSync(tmpDir, { recursive: true }); + + const manifest = { + schema: "openscout.knowledge.collection/v1", + collectionId: collection.id, + kind: collection.kind, + title: collection.title, + generator: { + extractorVersion: collection.extractorVersion, + generatedAt: new Date(collection.updatedAt).toISOString(), + }, + source: { + kind: "harness_transcript", + harness: file.harness, + ref: portablePath(file.path), + sessionId: parse.sessionId, + sizeBytes: file.size, + mtimeMs: file.mtimeMs, + contentHash: parse.contentHash, + recordsScanned: parse.records.length, + }, + chunking: { + events: { + strategy: "record-window", + window: EVENT_WINDOW_RECORDS, + chunkRecords: EVENT_CHUNK_RECORDS, + version: CHUNK_POLICY_VERSION, + }, + }, + documents: documents.map((document) => ({ + path: document.path, + kind: document.kind, + origin: "mechanical", + bytes: Buffer.byteLength(document.content, "utf8"), + contentHash: hashText(document.content), + })), + facets: collection.facets, + ownership: "derived", + contentHash: collection.contentHash, + status: collection.status, + }; + + writeFileSync(join(tmpDir, "manifest.json"), `${JSON.stringify(manifest, null, 2)}\n`, "utf8"); + for (const document of documents) { + writeFileSync(join(tmpDir, document.path), document.content, "utf8"); + } + + rmSync(outDir, { recursive: true, force: true }); + mkdirSync(join(outDir, ".."), { recursive: true }); + renameSync(tmpDir, outDir); +} + +function collectionContentHash(file: SessionFile, parse: ParseResult): string { + return hashText([ + EXTRACTOR_VERSION, + CHUNK_POLICY_VERSION, + file.harness, + file.path, + file.mtimeMs, + file.size, + parse.contentHash, + ].join("\0")); +} + +function collectionIdFor(file: SessionFile, parse: ParseResult): string { + const sessionPart = parse.sessionId + ? parse.sessionId.replace(/[^A-Za-z0-9_.-]+/gu, "-").slice(0, 80) + : stableId(file.path); + return `sessions/${file.harness}/${sessionPart}-${stableId(file.path, 10)}`; +} + +function documentId(collectionId: string, path: string): string { + return hashText(`${collectionId}\0${path}`); +} + +function storeSessionCollection( + store: SQLiteKnowledgeStore, + file: SessionFile, + parse: ParseResult, + force: boolean, +): IndexedSessionKnowledgeSummary { + const project = projectName(parse.cwd, file.path); + const id = collectionIdFor(file, parse); + const qmdPath = knowledgeCollectionQmdPath(id); + const title = titleFor(file, parse, project); + const sourceRef = sourceRefFor(file, parse, parse.records.length > 0 ? [0, parse.records.at(-1)!.i] : undefined); + const facets: KnowledgeFacets = { + harness: file.harness, + project, + source: "sessions", + transcriptPath: file.path, + sessionId: parse.sessionId ?? "", + }; + const now = Date.now(); + const collection: KnowledgeCollection = { + id, + kind: "sessions", + title, + sourceRefs: [sourceRef], + qmdPath, + status: "ready", + contentHash: collectionContentHash(file, parse), + extractorVersion: EXTRACTOR_VERSION, + chunkPolicyVersion: CHUNK_POLICY_VERSION, + createdAt: now, + updatedAt: now, + facets, + }; + const existing = store.getCollection(id); + if (!force && existing?.contentHash === collection.contentHash) { + return { + collectionId: id, + title, + harness: file.harness, + project, + transcriptPath: file.path, + qmdPath, + records: parse.records.length, + documents: 0, + chunks: 0, + bytes: file.size, + mtimeMs: file.mtimeMs, + skipped: true, + }; + } + + const documents = buildDocuments(parse, file, project, title); + writeQmdCollection(collection, documents, parse, file); + + store.deleteCollection(id); + store.upsertCollection(collection); + + let chunks = 0; + for (const extracted of documents) { + const doc: KnowledgeDocument = { + id: documentId(id, extracted.path), + collectionId: id, + path: extracted.path, + kind: extracted.kind, + origin: "mechanical", + contentHash: hashText(extracted.content), + }; + store.upsertDocument(doc); + chunkDocument(extracted).forEach((chunk, ordinal) => { + const knowledgeChunk: KnowledgeChunk = { + id: deterministicKnowledgeChunkId({ + collectionId: id, + documentPath: extracted.path, + ordinal, + chunkPolicyVersion: CHUNK_POLICY_VERSION, + text: chunk.text, + }), + collectionId: id, + documentId: doc.id, + documentPath: extracted.path, + ordinal, + text: chunk.text, + textHash: hashText(chunk.text), + origin: "mechanical", + ownership: "derived", + sourceRefs: [chunk.sourceRef], + facets, + }; + store.upsertChunk(knowledgeChunk, `${title} / ${extracted.path}`); + chunks++; + }); + } + + return { + collectionId: id, + title, + harness: file.harness, + project, + transcriptPath: file.path, + qmdPath, + records: parse.records.length, + documents: documents.length, + chunks, + bytes: file.size, + mtimeMs: file.mtimeMs, + }; +} + +export async function indexRecentSessionKnowledge( + input: IndexRecentSessionKnowledgeInput = {}, +): Promise { + const days = clampPositiveInt(input.days, DEFAULT_DAYS, 30); + const limit = clampPositiveInt(input.limit, DEFAULT_LIMIT, 1000); + const store = new SQLiteKnowledgeStore(); + const job = store.createIndexJob({ source: "sessions", days, force: input.force, mode: "foreground" }); + const leaseGeneration = job.leaseGeneration + 1; + const files = discoverRecentSessionFiles(days, limit); + const sessions: IndexedSessionKnowledgeSummary[] = []; + let indexed = 0; + let failed = 0; + + try { + store.updateIndexJob({ + id: job.id, + state: "running", + leaseOwner: "session-indexer", + leaseGeneration, + progress: { discovered: files.length, extracted: 0, indexed: 0, failed: 0 }, + }); + for (const file of files) { + try { + const parse = await parseJsonl(file); + const summary = storeSessionCollection(store, file, parse, input.force === true); + sessions.push(summary); + indexed++; + } catch (error) { + failed++; + sessions.push({ + collectionId: `failed/${file.harness}/${stableId(file.path)}`, + title: basename(file.path), + harness: file.harness, + project: projectName(null, file.path), + transcriptPath: file.path, + qmdPath: "", + records: 0, + documents: 0, + chunks: 0, + bytes: file.size, + mtimeMs: file.mtimeMs, + error: error instanceof Error ? error.message : String(error), + }); + } + store.updateIndexJob({ + id: job.id, + state: "running", + leaseOwner: "session-indexer", + leaseGeneration, + progress: { discovered: files.length, extracted: indexed + failed, indexed, failed }, + }); + } + const completed = store.updateIndexJob({ + id: job.id, + state: "completed", + completedAt: Date.now(), + progress: { discovered: files.length, extracted: indexed + failed, indexed, failed }, + }) ?? job; + return { job: completed, days, discovered: files.length, indexed, failed, sessions }; + } catch (error) { + const failedJob = store.updateIndexJob({ + id: job.id, + state: "failed", + completedAt: Date.now(), + error: error instanceof Error ? error.message : String(error), + progress: { discovered: files.length, extracted: indexed + failed, indexed, failed }, + }) ?? job; + return { job: failedJob, days, discovered: files.length, indexed, failed: failed + 1, sessions }; + } finally { + store.close(); + } +} diff --git a/packages/runtime/src/knowledge/store.ts b/packages/runtime/src/knowledge/store.ts new file mode 100644 index 00000000..73013175 --- /dev/null +++ b/packages/runtime/src/knowledge/store.ts @@ -0,0 +1,647 @@ +import { mkdirSync, statSync } from "node:fs"; +import { createHash, randomUUID } from "node:crypto"; +import { dirname } from "node:path"; + +import { Database } from "bun:sqlite"; + +import { resolveOpenScoutKnowledgePaths, type OpenScoutKnowledgePaths } from "./paths.js"; +import type { + KnowledgeChunk, + KnowledgeCollection, + KnowledgeDocument, + KnowledgeDrilldown, + KnowledgeFacets, + KnowledgeIndexJob, + KnowledgeIndexJobState, + KnowledgeIndexRequest, + KnowledgeSearchHit, + KnowledgeSearchQuery, + KnowledgeSourceRef, + KnowledgeStatus, +} from "./types.js"; + +type SQLiteBinding = string | number | bigint | boolean | null | Uint8Array; + +type SQLiteTransactionalDatabase = Database & { + transaction( + callback: (...args: TArgs) => TResult + ): (...args: TArgs) => TResult; +}; + +type CollectionRow = { + id: string; + kind: KnowledgeCollection["kind"]; + title: string; + source_refs_json: string; + qmd_path: string; + status: KnowledgeCollection["status"]; + content_hash: string; + extractor_version: string; + chunk_policy_version: string; + created_at: number; + updated_at: number; + facets_json: string; +}; + +type ChunkRow = { + id: string; + collection_id: string; + document_id: string; + document_path: string; + ordinal: number; + text: string; + text_hash: string; + origin: KnowledgeChunk["origin"]; + ownership: KnowledgeChunk["ownership"]; + source_refs_json: string; + facets_json: string; + title?: string; + rank?: number; +}; + +type JobRow = { + id: string; + source: KnowledgeIndexJob["source"]; + state: KnowledgeIndexJobState; + lease_owner: string | null; + lease_generation: number; + progress_json: string; + created_at: number; + updated_at: number; + completed_at: number | null; + error: string | null; +}; + +const KNOWLEDGE_SQLITE_SCHEMA = ` +PRAGMA journal_mode = WAL; +PRAGMA foreign_keys = ON; + +CREATE TABLE IF NOT EXISTS collections ( + id TEXT PRIMARY KEY, + kind TEXT NOT NULL, + title TEXT NOT NULL, + source_refs_json TEXT NOT NULL, + qmd_path TEXT NOT NULL, + status TEXT NOT NULL, + content_hash TEXT NOT NULL, + extractor_version TEXT NOT NULL, + chunk_policy_version TEXT NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + facets_json TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS documents ( + id TEXT PRIMARY KEY, + collection_id TEXT NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + path TEXT NOT NULL, + kind TEXT NOT NULL, + origin TEXT NOT NULL, + content_hash TEXT NOT NULL, + metadata_json TEXT, + UNIQUE(collection_id, path) +); + +CREATE TABLE IF NOT EXISTS chunks ( + id TEXT PRIMARY KEY, + collection_id TEXT NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + document_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + document_path TEXT NOT NULL, + ordinal INTEGER NOT NULL, + text TEXT NOT NULL, + text_hash TEXT NOT NULL, + origin TEXT NOT NULL, + ownership TEXT NOT NULL, + source_refs_json TEXT NOT NULL, + facets_json TEXT NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + UNIQUE(collection_id, document_path, ordinal) +); + +CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5( + chunk_id UNINDEXED, + collection_id UNINDEXED, + document_id UNINDEXED, + title, + body, + tokenize = "unicode61 tokenchars '-_./'" +); + +CREATE TABLE IF NOT EXISTS facets ( + collection_id TEXT NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + chunk_id TEXT REFERENCES chunks(id) ON DELETE CASCADE, + key TEXT NOT NULL, + value TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_facets_key_value ON facets(key, value); +CREATE INDEX IF NOT EXISTS idx_facets_collection_key ON facets(collection_id, key); + +CREATE TABLE IF NOT EXISTS source_refs ( + id TEXT PRIMARY KEY, + collection_id TEXT NOT NULL REFERENCES collections(id) ON DELETE CASCADE, + chunk_id TEXT REFERENCES chunks(id) ON DELETE CASCADE, + kind TEXT NOT NULL, + ref_json TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_source_refs_kind ON source_refs(kind); +CREATE INDEX IF NOT EXISTS idx_source_refs_collection ON source_refs(collection_id); + +CREATE TABLE IF NOT EXISTS index_jobs ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + state TEXT NOT NULL, + lease_owner TEXT, + lease_generation INTEGER NOT NULL DEFAULT 0, + progress_json TEXT NOT NULL, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + completed_at INTEGER, + error TEXT +); + +CREATE INDEX IF NOT EXISTS idx_index_jobs_state_updated ON index_jobs(state, updated_at DESC); +`; + +function stringify(value: unknown): string { + return JSON.stringify(value ?? null); +} + +function parseJson(value: string | null | undefined, fallback: T): T { + if (!value) return fallback; + try { + return JSON.parse(value) as T; + } catch { + return fallback; + } +} + +function nowMs(): number { + return Date.now(); +} + +function normalizedLimit(value: number | undefined, fallback = 20, max = 100): number { + if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) { + return fallback; + } + return Math.min(max, Math.floor(value)); +} + +function normalizeFtsQuery(value: string): string { + const terms = value + .split(/[^A-Za-z0-9_./-]+/u) + .map((term) => term.trim()) + .filter((term) => term.length > 0) + .slice(0, 12); + return terms.map((term) => `"${term.replace(/"/g, "\"\"")}"`).join(" "); +} + +function textHash(value: string): string { + return `sha256:${createHash("sha256").update(value).digest("hex")}`; +} + +export function deterministicKnowledgeChunkId(input: { + collectionId: string; + documentPath: string; + ordinal: number; + chunkPolicyVersion: string; + text: string; +}): string { + return textHash([ + input.collectionId, + input.documentPath, + String(input.ordinal), + input.chunkPolicyVersion, + textHash(input.text), + ].join("\0")); +} + +function collectionFromRow(row: CollectionRow): KnowledgeCollection { + return { + id: row.id, + kind: row.kind, + title: row.title, + sourceRefs: parseJson(row.source_refs_json, []), + qmdPath: row.qmd_path, + status: row.status, + contentHash: row.content_hash, + extractorVersion: row.extractor_version, + chunkPolicyVersion: row.chunk_policy_version, + createdAt: row.created_at, + updatedAt: row.updated_at, + facets: parseJson(row.facets_json, {}), + }; +} + +function chunkFromRow(row: ChunkRow): KnowledgeChunk { + return { + id: row.id, + collectionId: row.collection_id, + documentId: row.document_id, + documentPath: row.document_path, + ordinal: row.ordinal, + text: row.text, + textHash: row.text_hash, + origin: row.origin, + ownership: row.ownership, + sourceRefs: parseJson(row.source_refs_json, []), + facets: parseJson(row.facets_json, {}), + }; +} + +function jobFromRow(row: JobRow): KnowledgeIndexJob { + return { + id: row.id, + source: row.source, + state: row.state, + leaseOwner: row.lease_owner ?? undefined, + leaseGeneration: row.lease_generation, + progress: parseJson(row.progress_json, {}), + createdAt: row.created_at, + updatedAt: row.updated_at, + completedAt: row.completed_at ?? undefined, + error: row.error ?? undefined, + }; +} + +function drilldownsForChunk(chunk: KnowledgeChunk): KnowledgeDrilldown[] { + const drilldowns: KnowledgeDrilldown[] = [ + { + kind: "qmd", + collectionId: chunk.collectionId, + documentPath: chunk.documentPath, + chunkId: chunk.id, + }, + ]; + for (const sourceRef of chunk.sourceRefs) { + if (sourceRef.kind === "harness_transcript") { + drilldowns.push({ kind: "harness_transcript", sourceRef }); + } else if (sourceRef.kind === "file" || sourceRef.kind === "skill" || sourceRef.kind === "context_pack") { + drilldowns.push({ kind: "file", sourceRef }); + } else if (sourceRef.kind === "scout_record") { + drilldowns.push({ kind: "scout_record", sourceRef }); + } else if (sourceRef.kind === "mcp_tool") { + drilldowns.push({ kind: "mcp_tool", sourceRef }); + } + } + return drilldowns; +} + +function snippet(text: string, query: string): string { + const compact = text.replace(/\s+/g, " ").trim(); + if (compact.length <= 220) return compact; + const needle = query.toLowerCase().split(/\s+/).find((part) => part.length > 2); + const index = needle ? compact.toLowerCase().indexOf(needle) : -1; + const start = Math.max(0, index >= 0 ? index - 70 : 0); + const end = Math.min(compact.length, start + 220); + return `${start > 0 ? "..." : ""}${compact.slice(start, end)}${end < compact.length ? "..." : ""}`; +} + +function searchHitFromRow(row: ChunkRow, query: string): KnowledgeSearchHit { + const chunk = chunkFromRow(row); + return { + id: `hit:${chunk.id}`, + collectionId: chunk.collectionId, + documentId: chunk.documentId, + chunkId: chunk.id, + title: row.title ?? chunk.documentPath, + snippet: snippet(chunk.text, query), + score: typeof row.rank === "number" ? row.rank : 0, + scoreSource: "fts", + origin: chunk.origin, + ownership: chunk.ownership, + freshness: "unknown", + sourceRefs: chunk.sourceRefs, + drilldown: drilldownsForChunk(chunk), + facets: chunk.facets, + }; +} + +function insertFacetRows(db: Database, collectionId: string, chunkId: string | null, facets: KnowledgeFacets): void { + const statement = db.query( + `INSERT INTO facets (collection_id, chunk_id, key, value) VALUES (?1, ?2, ?3, ?4)`, + ); + for (const [key, rawValue] of Object.entries(facets)) { + const values = Array.isArray(rawValue) ? rawValue : [rawValue]; + for (const value of values) { + statement.run(collectionId, chunkId, key, value); + } + } +} + +function insertSourceRefs(db: Database, collectionId: string, chunkId: string | null, refs: KnowledgeSourceRef[]): void { + const statement = db.query( + `INSERT INTO source_refs (id, collection_id, chunk_id, kind, ref_json) + VALUES (?1, ?2, ?3, ?4, ?5)`, + ); + refs.forEach((ref, index) => { + statement.run( + textHash(`${collectionId}\0${chunkId ?? "collection"}\0${index}\0${stringify(ref)}`), + collectionId, + chunkId, + ref.kind, + stringify(ref), + ); + }); +} + +export class SQLiteKnowledgeStore { + private readonly db: Database; + private readonly paths: OpenScoutKnowledgePaths; + + constructor(dbPath?: string, paths?: OpenScoutKnowledgePaths) { + const resolvedPaths = paths ?? resolveOpenScoutKnowledgePaths(); + const sqlitePath = dbPath ?? resolvedPaths.sqlitePath; + this.paths = { ...resolvedPaths, sqlitePath }; + mkdirSync(dirname(sqlitePath), { recursive: true }); + mkdirSync(this.paths.qmdRoot, { recursive: true }); + this.db = new Database(sqlitePath, { create: true }); + this.db.exec("PRAGMA busy_timeout = 5000;"); + this.db.exec("PRAGMA journal_mode = WAL;"); + this.db.exec("PRAGMA synchronous = NORMAL;"); + this.db.exec(KNOWLEDGE_SQLITE_SCHEMA); + } + + close(): void { + this.db.close(); + } + + upsertCollection(collection: KnowledgeCollection): void { + this.db.query( + `INSERT INTO collections ( + id, kind, title, source_refs_json, qmd_path, status, content_hash, + extractor_version, chunk_policy_version, created_at, updated_at, facets_json + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12) + ON CONFLICT(id) DO UPDATE SET + kind = excluded.kind, + title = excluded.title, + source_refs_json = excluded.source_refs_json, + qmd_path = excluded.qmd_path, + status = excluded.status, + content_hash = excluded.content_hash, + extractor_version = excluded.extractor_version, + chunk_policy_version = excluded.chunk_policy_version, + updated_at = excluded.updated_at, + facets_json = excluded.facets_json`, + ).run( + collection.id, + collection.kind, + collection.title, + stringify(collection.sourceRefs), + collection.qmdPath, + collection.status, + collection.contentHash, + collection.extractorVersion, + collection.chunkPolicyVersion, + collection.createdAt, + collection.updatedAt, + stringify(collection.facets), + ); + + this.db.query("DELETE FROM facets WHERE collection_id = ?1 AND chunk_id IS NULL").run(collection.id); + this.db.query("DELETE FROM source_refs WHERE collection_id = ?1 AND chunk_id IS NULL").run(collection.id); + insertFacetRows(this.db, collection.id, null, collection.facets); + insertSourceRefs(this.db, collection.id, null, collection.sourceRefs); + } + + getCollection(id: string): KnowledgeCollection | null { + const row = this.db.query("SELECT * FROM collections WHERE id = ?1").get(id) as CollectionRow | null; + return row ? collectionFromRow(row) : null; + } + + deleteCollection(id: string): void { + (this.db as SQLiteTransactionalDatabase).transaction(() => { + const chunkRows = this.db.query( + "SELECT id FROM chunks WHERE collection_id = ?1", + ).all(id) as Array<{ id: string }>; + const deleteFts = this.db.query("DELETE FROM chunks_fts WHERE chunk_id = ?1"); + for (const row of chunkRows) { + deleteFts.run(row.id); + } + this.db.query("DELETE FROM collections WHERE id = ?1").run(id); + })(); + } + + upsertDocument(document: KnowledgeDocument): void { + this.db.query( + `INSERT INTO documents (id, collection_id, path, kind, origin, content_hash, metadata_json) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) + ON CONFLICT(id) DO UPDATE SET + collection_id = excluded.collection_id, + path = excluded.path, + kind = excluded.kind, + origin = excluded.origin, + content_hash = excluded.content_hash, + metadata_json = excluded.metadata_json`, + ).run( + document.id, + document.collectionId, + document.path, + document.kind, + document.origin, + document.contentHash, + stringify(document.metadata ?? null), + ); + } + + upsertChunk(chunk: KnowledgeChunk, title = chunk.documentPath): void { + const now = nowMs(); + (this.db as SQLiteTransactionalDatabase).transaction(() => { + this.db.query( + `INSERT INTO chunks ( + id, collection_id, document_id, document_path, ordinal, text, text_hash, + origin, ownership, source_refs_json, facets_json, created_at, updated_at + ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13) + ON CONFLICT(id) DO UPDATE SET + collection_id = excluded.collection_id, + document_id = excluded.document_id, + document_path = excluded.document_path, + ordinal = excluded.ordinal, + text = excluded.text, + text_hash = excluded.text_hash, + origin = excluded.origin, + ownership = excluded.ownership, + source_refs_json = excluded.source_refs_json, + facets_json = excluded.facets_json, + updated_at = excluded.updated_at`, + ).run( + chunk.id, + chunk.collectionId, + chunk.documentId, + chunk.documentPath, + chunk.ordinal, + chunk.text, + chunk.textHash, + chunk.origin, + chunk.ownership, + stringify(chunk.sourceRefs), + stringify(chunk.facets), + now, + now, + ); + + this.db.query("DELETE FROM chunks_fts WHERE chunk_id = ?1").run(chunk.id); + this.db.query( + `INSERT INTO chunks_fts (chunk_id, collection_id, document_id, title, body) + VALUES (?1, ?2, ?3, ?4, ?5)`, + ).run(chunk.id, chunk.collectionId, chunk.documentId, title, chunk.text); + + this.db.query("DELETE FROM facets WHERE chunk_id = ?1").run(chunk.id); + this.db.query("DELETE FROM source_refs WHERE chunk_id = ?1").run(chunk.id); + insertFacetRows(this.db, chunk.collectionId, chunk.id, chunk.facets); + insertSourceRefs(this.db, chunk.collectionId, chunk.id, chunk.sourceRefs); + })(); + } + + searchLexical(query: KnowledgeSearchQuery): KnowledgeSearchHit[] { + const q = query.q.trim(); + if (!q) return []; + const ftsQuery = normalizeFtsQuery(q); + if (!ftsQuery) return []; + const params: SQLiteBinding[] = [ftsQuery]; + const clauses = ["chunks_fts MATCH ?1"]; + + if (query.collections?.length) { + const placeholders = query.collections.map((collectionId) => { + params.push(collectionId); + return `?${params.length}`; + }).join(", "); + clauses.push(`c.collection_id IN (${placeholders})`); + } + + if (query.sourceKinds?.length) { + const placeholders = query.sourceKinds.map((kind) => { + params.push(kind); + return `?${params.length}`; + }).join(", "); + clauses.push(`col.kind IN (${placeholders})`); + } + + const sql = ` + SELECT + c.*, + col.title AS title, + bm25(chunks_fts) AS rank + FROM chunks_fts + JOIN chunks c ON c.id = chunks_fts.chunk_id + JOIN collections col ON col.id = c.collection_id + WHERE ${clauses.join(" AND ")} + ORDER BY rank ASC + LIMIT ?${params.length + 1}`; + params.push(normalizedLimit(query.limit)); + + try { + const rows = this.db.query(sql).all(...params) as ChunkRow[]; + return rows.map((row) => searchHitFromRow(row, q)); + } catch { + return []; + } + } + + createIndexJob(request: KnowledgeIndexRequest, id = `knowledge-job-${randomUUID()}`): KnowledgeIndexJob { + const now = nowMs(); + const job: KnowledgeIndexJob = { + id, + source: request.source, + state: "queued", + leaseGeneration: 0, + progress: {}, + createdAt: now, + updatedAt: now, + }; + this.db.query( + `INSERT INTO index_jobs (id, source, state, lease_generation, progress_json, created_at, updated_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)`, + ).run(job.id, job.source, job.state, job.leaseGeneration, stringify(job.progress), job.createdAt, job.updatedAt); + return job; + } + + updateIndexJob(input: { + id: string; + state?: KnowledgeIndexJobState; + leaseOwner?: string | null; + leaseGeneration?: number; + progress?: KnowledgeIndexJob["progress"]; + error?: string | null; + completedAt?: number | null; + }): KnowledgeIndexJob | null { + const existing = this.getIndexJob(input.id); + if (!existing) return null; + const next: KnowledgeIndexJob = { + ...existing, + state: input.state ?? existing.state, + leaseOwner: input.leaseOwner === null ? undefined : input.leaseOwner ?? existing.leaseOwner, + leaseGeneration: input.leaseGeneration ?? existing.leaseGeneration, + progress: input.progress ?? existing.progress, + updatedAt: nowMs(), + completedAt: input.completedAt === null ? undefined : input.completedAt ?? existing.completedAt, + error: input.error === null ? undefined : input.error ?? existing.error, + }; + this.db.query( + `UPDATE index_jobs + SET state = ?2, + lease_owner = ?3, + lease_generation = ?4, + progress_json = ?5, + updated_at = ?6, + completed_at = ?7, + error = ?8 + WHERE id = ?1`, + ).run( + next.id, + next.state, + next.leaseOwner ?? null, + next.leaseGeneration, + stringify(next.progress), + next.updatedAt, + next.completedAt ?? null, + next.error ?? null, + ); + return next; + } + + getIndexJob(id: string): KnowledgeIndexJob | null { + const row = this.db.query("SELECT * FROM index_jobs WHERE id = ?1").get(id) as JobRow | null; + return row ? jobFromRow(row) : null; + } + + listActiveJobs(): KnowledgeIndexJob[] { + const rows = this.db.query( + `SELECT * FROM index_jobs + WHERE state IN ('queued', 'running', 'waiting') + ORDER BY updated_at DESC + LIMIT 50`, + ).all() as JobRow[]; + return rows.map(jobFromRow); + } + + status(): KnowledgeStatus { + const collectionCounts = this.db.query( + `SELECT COUNT(*) AS total, + SUM(CASE WHEN status = 'ready' THEN 1 ELSE 0 END) AS ready + FROM collections`, + ).get() as { total: number; ready: number | null } | null; + const chunkCounts = this.db.query("SELECT COUNT(*) AS total FROM chunks").get() as { total: number } | null; + let sqliteBytes = 0; + try { + sqliteBytes = statSync(this.paths.sqlitePath).size; + } catch { + sqliteBytes = 0; + } + return { + generatedAt: nowMs(), + paths: { + knowledgeRoot: this.paths.knowledgeRoot, + qmdRoot: this.paths.qmdRoot, + sqlitePath: this.paths.sqlitePath, + }, + collections: collectionCounts?.total ?? 0, + readyCollections: collectionCounts?.ready ?? 0, + chunks: chunkCounts?.total ?? 0, + activeJobs: this.listActiveJobs(), + sqliteBytes, + }; + } +} diff --git a/packages/runtime/src/knowledge/types.ts b/packages/runtime/src/knowledge/types.ts new file mode 100644 index 00000000..7021e9c8 --- /dev/null +++ b/packages/runtime/src/knowledge/types.ts @@ -0,0 +1,183 @@ +export type KnowledgeCollectionKind = + | "sessions" + | "skills" + | "mcp" + | "codebase" + | "context_pack" + | "mixed"; + +export type KnowledgeCollectionStatus = "building" | "ready" | "failed"; +export type KnowledgeDocumentOrigin = "mechanical" | "enrichment"; +export type KnowledgeOwnership = "scout_owned" | "derived" | "observed_source"; +export type KnowledgeFreshness = "fresh" | "stale" | "source_missing" | "unknown"; +export type KnowledgeScoreSource = "fts" | "vector" | "hybrid"; + +export type KnowledgeFacets = Record; + +export interface KnowledgeSourceAnchor { + sizeBytes?: number; + mtimeMs?: number; + contentHash?: string; +} + +export interface KnowledgePortablePath { + root: + | "HOME" + | "OPENSCOUT_CONTROL_HOME" + | "OPENSCOUT_SUPPORT_DIRECTORY" + | "PROJECT_ROOT" + | "ABSOLUTE"; + relPath: string; +} + +export type KnowledgeSourceRef = + | { + kind: "harness_transcript"; + harness: string; + path: KnowledgePortablePath; + sessionId?: string; + recordRange?: [number, number]; + byteRange?: [number, number]; + anchor?: KnowledgeSourceAnchor; + } + | { kind: "scout_record"; recordKind: string; id: string } + | { + kind: "skill"; + path: KnowledgePortablePath; + skillName?: string; + anchor?: KnowledgeSourceAnchor; + } + | { kind: "mcp_tool"; serverId: string; toolName: string; schemaPath?: string } + | { + kind: "file"; + path: KnowledgePortablePath; + lineRange?: [number, number]; + anchor?: KnowledgeSourceAnchor; + } + | { + kind: "context_pack"; + path: KnowledgePortablePath; + packId?: string; + schemaVersion?: string; + anchor?: KnowledgeSourceAnchor; + }; + +export interface KnowledgeCollection { + id: string; + kind: KnowledgeCollectionKind; + title: string; + sourceRefs: KnowledgeSourceRef[]; + qmdPath: string; + status: KnowledgeCollectionStatus; + contentHash: string; + extractorVersion: string; + chunkPolicyVersion: string; + createdAt: number; + updatedAt: number; + facets: KnowledgeFacets; +} + +export interface KnowledgeDocument { + id: string; + collectionId: string; + path: string; + kind: string; + origin: KnowledgeDocumentOrigin; + contentHash: string; + metadata?: Record; +} + +export interface KnowledgeChunk { + id: string; + collectionId: string; + documentId: string; + documentPath: string; + ordinal: number; + text: string; + textHash: string; + origin: KnowledgeDocumentOrigin; + ownership: KnowledgeOwnership; + sourceRefs: KnowledgeSourceRef[]; + facets: KnowledgeFacets; +} + +export interface KnowledgeSearchQuery { + q: string; + collections?: string[]; + sourceKinds?: KnowledgeCollectionKind[]; + facets?: KnowledgeFacets; + limit?: number; + mode?: "lexical" | "semantic" | "hybrid"; +} + +export type KnowledgeDrilldown = + | { kind: "qmd"; collectionId: string; documentPath: string; chunkId?: string } + | { kind: "harness_transcript"; sourceRef: Extract } + | { kind: "file"; sourceRef: Extract } + | { kind: "scout_record"; sourceRef: Extract } + | { kind: "mcp_tool"; sourceRef: Extract }; + +export interface KnowledgeSearchHit { + id: string; + collectionId: string; + documentId: string; + chunkId: string; + title: string; + snippet: string; + score: number; + scoreSource: KnowledgeScoreSource; + origin: KnowledgeDocumentOrigin; + ownership: KnowledgeOwnership; + freshness: KnowledgeFreshness; + sourceRefs: KnowledgeSourceRef[]; + drilldown: KnowledgeDrilldown[]; + facets: KnowledgeFacets; +} + +export interface KnowledgeIndexRequest { + source: Exclude; + days?: number; + collections?: string[]; + force?: boolean; + mode?: "foreground" | "background"; +} + +export type KnowledgeIndexJobState = + | "queued" + | "running" + | "waiting" + | "completed" + | "failed" + | "cancelled"; + +export interface KnowledgeIndexJob { + id: string; + source: KnowledgeIndexRequest["source"]; + state: KnowledgeIndexJobState; + leaseOwner?: string; + leaseGeneration: number; + progress: { + discovered?: number; + extracted?: number; + indexed?: number; + failed?: number; + }; + createdAt: number; + updatedAt: number; + completedAt?: number; + error?: string; +} + +export interface KnowledgeStatus { + generatedAt: number; + paths: { + knowledgeRoot: string; + qmdRoot: string; + sqlitePath: string; + }; + collections: number; + readyCollections: number; + chunks: number; + activeJobs: KnowledgeIndexJob[]; + sqliteBytes: number; +} diff --git a/packages/runtime/src/support-paths.ts b/packages/runtime/src/support-paths.ts index 886f0559..45a97458 100644 --- a/packages/runtime/src/support-paths.ts +++ b/packages/runtime/src/support-paths.ts @@ -16,6 +16,9 @@ export type OpenScoutSupportPaths = { managedInstallsPath: string; relayHubDirectory: string; controlHome: string; + knowledgeDirectory: string; + knowledgeQmdDirectory: string; + knowledgeSqlitePath: string; desktopStatusPath: string; workspaceStatePath: string; cutoverMarkerPath: string; @@ -30,6 +33,9 @@ export function resolveOpenScoutSupportPaths(): OpenScoutSupportPaths { const logsDirectory = join(supportDirectory, "logs"); const runtimeDirectory = join(supportDirectory, "runtime"); const catalogDirectory = join(supportDirectory, "catalog"); + const controlHome = process.env.OPENSCOUT_CONTROL_HOME + ?? join(home, ".openscout", "control-plane"); + const knowledgeDirectory = join(controlHome, "knowledge"); return { supportDirectory, @@ -45,8 +51,10 @@ export function resolveOpenScoutSupportPaths(): OpenScoutSupportPaths { managedInstallsPath: join(supportDirectory, "managed-installs.json"), relayHubDirectory: process.env.OPENSCOUT_RELAY_HUB ?? join(home, ".openscout", "relay"), - controlHome: process.env.OPENSCOUT_CONTROL_HOME - ?? join(home, ".openscout", "control-plane"), + controlHome, + knowledgeDirectory, + knowledgeQmdDirectory: join(knowledgeDirectory, "qmd"), + knowledgeSqlitePath: join(knowledgeDirectory, "knowledge.sqlite"), desktopStatusPath: join(supportDirectory, "agent-status.json"), workspaceStatePath: join(supportDirectory, "workspace-state.json"), cutoverMarkerPath: join(supportDirectory, OPENSCOUT_RPC_CUTOVER_MARKER), diff --git a/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeBrokerClient.swift b/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeBrokerClient.swift index fc9c65b7..bc691a12 100644 --- a/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeBrokerClient.swift +++ b/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeBrokerClient.swift @@ -185,6 +185,16 @@ public final class BridgeBrokerClient: ScoutBrokerClient, TerminalAccessProvidin } } + /// Recent activity from the broker's *curated* home feed (via the + /// `mobile/activity` procedure → `readScoutBrokerHome().activity`): one row per + /// message, name-resolved, always thread-linked. This is the orientation feed + /// Home renders; the raw lifecycle firehose stays on the Tail tab. + public func recentActivity(limit: Int) async throws -> [TailEvent] { + let params = MobileActivityParams(limit: limit) + let wire: [MobileActivityItem] = try await connection.rpc("mobile/activity", params: params) + return wire.map { $0.toTailEvent() } + } + // MARK: - CommsCapability public func listConversations(kind: CommsConversation.Kind?, limit: Int) async throws -> [CommsConversation] { @@ -211,6 +221,13 @@ public final class BridgeBrokerClient: ScoutBrokerClient, TerminalAccessProvidin return result.messageId } + @discardableResult + public func markConversationRead(conversationId: String) async throws -> Int { + let params = MobileCommsMarkReadParams(conversationId: conversationId, lastReadMessageId: nil) + let result: MobileCommsMarkReadResult = try await connection.rpc("mobile/comms/read", params: params) + return result.unreadCount ?? 0 + } + // MARK: - TerminalAccessProviding public func provisionTerminalAccess(sshPublicKey: String) async throws -> TerminalAccess { @@ -321,6 +338,49 @@ struct MobileTerminalProvisionResult: Codable, Sendable { // MARK: - Listing wire shapes → contract summaries (best-effort mapping) +/// Input for `mobile.activity`. Only `limit` is sent from the phone — the other +/// server-side filters (agent/actor/conversation) stay unset for the fleet feed. +struct MobileActivityParams: Codable, Sendable { + let limit: Int +} + +/// Donor `ScoutBrokerHomeActivityRecord` (broker/service.ts), served via +/// `mobile/activity`. This is the broker's *curated* home feed — already deduped +/// to one row per message, name-resolved, and always thread-linked — so the phone +/// maps it straight onto a `TailEvent` with no substring guessing. The raw +/// `/v1/activity` lifecycle firehose lives on the Tail tab, not here. +struct MobileActivityItem: Codable, Sendable { + let id: String + let kind: String // "message" | "system" + let actorId: String + let actorName: String + let title: String + let detail: String? + let conversationId: String? + let channel: String? + let timestamp: Int + + func toTailEvent() -> TailEvent { + TailEvent( + id: id, + tsMs: Int64(scoutEpochMilliseconds(timestamp)), + source: actorName, + harness: .unattributed, // curated activity carries no harness attribution + kind: mappedKind, + summary: title, + conversationId: conversationId?.trimmedNonEmpty + ) + } + + /// The curated feed gives an exact role, so there's no guessing: the operator's + /// own posts read as `.user`, an agent's as `.assistant`, broker notices as + /// `.system`. (This drives the row's dot color.) + private var mappedKind: TailEvent.Kind { + if kind == "system" { return .system } + return actorId == "operator" ? .user : .assistant + } +} + /// Donor `MobileSessionSummary` (RPC.swift). Mapped into `SessionSummary`. struct MobileSessionSummary: Codable, Sendable { let id: String @@ -433,6 +493,16 @@ struct MobileCommsSendResult: Codable, Sendable { let messageId: String } +struct MobileCommsMarkReadParams: Codable, Sendable { + let conversationId: String + var lastReadMessageId: String? +} + +struct MobileCommsMarkReadResult: Codable, Sendable { + let conversationId: String + let unreadCount: Int? +} + /// Donor `mobile/comms/conversations` row. Flattened by the broker (participants /// + last-author already resolved to display labels). Mapped into the contract. struct MobileCommsConversation: Codable, Sendable { diff --git a/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeConnection.swift b/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeConnection.swift index bfcd4e94..92ab183d 100644 --- a/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeConnection.swift +++ b/packages/scout-ios-core/Sources/ScoutIOSCore/BridgeConnection.swift @@ -1076,7 +1076,8 @@ private struct WireTailEvent: Codable, Sendable { source: source, harness: mappedHarness, kind: mappedKind, - summary: summary + summary: summary, + conversationId: sessionId.trimmedNonEmpty ) } } diff --git a/packages/scout-ios-core/Sources/ScoutIOSCore/RPCWire.swift b/packages/scout-ios-core/Sources/ScoutIOSCore/RPCWire.swift index 303c5cfd..3467be36 100644 --- a/packages/scout-ios-core/Sources/ScoutIOSCore/RPCWire.swift +++ b/packages/scout-ios-core/Sources/ScoutIOSCore/RPCWire.swift @@ -37,6 +37,7 @@ struct TRPCRoute: Sendable { let trpcRouteMap: [String: TRPCRoute] = [ "mobile/sessions": TRPCRoute(path: "mobile.sessions", method: .query), "mobile/agents": TRPCRoute(path: "mobile.agents", method: .query), + "mobile/activity": TRPCRoute(path: "mobile.activity", method: .query), "mobile/session/snapshot": TRPCRoute(path: "mobile.sessionSnapshot", method: .query), "mobile/message/send": TRPCRoute(path: "mobile.sendMessage", method: .mutation), "mobile/session/create": TRPCRoute(path: "mobile.createSession", method: .mutation), @@ -46,6 +47,7 @@ let trpcRouteMap: [String: TRPCRoute] = [ "mobile/comms/conversations": TRPCRoute(path: "mobile.commsConversations", method: .query), "mobile/comms/messages": TRPCRoute(path: "mobile.commsMessages", method: .query), "mobile/comms/send": TRPCRoute(path: "mobile.commsSend", method: .mutation), + "mobile/comms/read": TRPCRoute(path: "mobile.commsMarkRead", method: .mutation), "mobile/terminal/provision": TRPCRoute(path: "mobile.terminalProvision", method: .mutation), ] diff --git a/packages/scout-native-core/Sources/ScoutCapabilities/Comms.swift b/packages/scout-native-core/Sources/ScoutCapabilities/Comms.swift index 763b2a26..eed312e2 100644 --- a/packages/scout-native-core/Sources/ScoutCapabilities/Comms.swift +++ b/packages/scout-native-core/Sources/ScoutCapabilities/Comms.swift @@ -111,4 +111,11 @@ public protocol CommsCapability: Sendable { /// post under an existing message. Returns the new message id. @discardableResult func postMessage(conversationId: String, body: String, replyTo: String?) async throws -> String + + /// Mark a conversation read — advances the operator's read cursor on the + /// broker through the latest message, clearing the unread badge. Returns the + /// resulting unread count (0 when caught up). Opening a thread should call + /// this so the count doesn't linger forever. + @discardableResult + func markConversationRead(conversationId: String) async throws -> Int } diff --git a/packages/scout-native-core/Sources/ScoutCapabilities/Tail.swift b/packages/scout-native-core/Sources/ScoutCapabilities/Tail.swift index 379f142e..662ea082 100644 --- a/packages/scout-native-core/Sources/ScoutCapabilities/Tail.swift +++ b/packages/scout-native-core/Sources/ScoutCapabilities/Tail.swift @@ -27,6 +27,9 @@ public struct TailEvent: Codable, Sendable, Identifiable, Equatable { public var harness: Harness public var kind: Kind public var summary: String + /// The conversation/session this event belongs to, when known — lets a row + /// tap through to where it happened. nil for events with no thread linkage. + public var conversationId: String? public init( id: String, @@ -34,7 +37,8 @@ public struct TailEvent: Codable, Sendable, Identifiable, Equatable { source: Source, harness: Harness = .unattributed, kind: Kind = .other, - summary: String + summary: String, + conversationId: String? = nil ) { self.id = id self.tsMs = tsMs @@ -42,6 +46,7 @@ public struct TailEvent: Codable, Sendable, Identifiable, Equatable { self.harness = harness self.kind = kind self.summary = summary + self.conversationId = conversationId } } @@ -49,4 +54,15 @@ public struct TailEvent: Codable, Sendable, Identifiable, Equatable { /// cursor (ms epoch); the transport delivers events as they arrive. public protocol TailCapability: Sendable { func tailEvents(since: Int64?) -> AsyncStream + + /// Recent activity history (newest-first), for *seeding* a view before the + /// live `tailEvents` stream takes over. `tailEvents` only delivers events + /// that arrive after subscription, so a freshly-opened surface shows nothing + /// without this backfill. Conformers without a history source get the + /// default empty list. + func recentActivity(limit: Int) async throws -> [TailEvent] +} + +public extension TailCapability { + func recentActivity(limit: Int) async throws -> [TailEvent] { [] } } diff --git a/packages/web/client/OpenScoutAppShell.tsx b/packages/web/client/OpenScoutAppShell.tsx index 0a496b8c..e346f5a5 100644 --- a/packages/web/client/OpenScoutAppShell.tsx +++ b/packages/web/client/OpenScoutAppShell.tsx @@ -24,6 +24,7 @@ const SIDE_PANEL_MIN_WIDTH = 240; const SIDE_PANEL_MAX_WIDTH_HARD_CAP = 900; const SIDE_PANEL_MAX_WIDTH_VIEWPORT_RATIO = 0.45; const SIDE_PANEL_MAX_WIDTH_FLOOR = 500; +const SEARCH_RIGHT_PANEL_MIN_WIDTH = 420; // Cap at 45% of viewport, floored at 500 so small screens still get usable inspector. function computeSidePanelMaxWidth(viewportWidth: number) { @@ -111,6 +112,7 @@ function OpenScoutAppShellInner({ app, assistantEnabled }: { app: HudsonApp; ass const [sidePanelMaxWidth, setSidePanelMaxWidth] = useState(() => computeSidePanelMaxWidth(typeof window !== "undefined" ? window.innerWidth : 1280), ); + const isSearchRoute = typeof window !== "undefined" && window.location.pathname === "/search"; useEffect(() => { const update = () => setSidePanelMaxWidth(computeSidePanelMaxWidth(window.innerWidth)); @@ -124,6 +126,11 @@ function OpenScoutAppShellInner({ app, assistantEnabled }: { app: HudsonApp; ass setRightWidth((current) => Math.min(sidePanelMaxWidth, Math.max(SIDE_PANEL_MIN_WIDTH, current))); }, [sidePanelMaxWidth, setLeftWidth, setRightWidth]); + useEffect(() => { + if (!isSearchRoute || rightCollapsed || rightOverlay) return; + setRightWidth((current) => Math.max(current, Math.min(sidePanelMaxWidth, SEARCH_RIGHT_PANEL_MIN_WIDTH))); + }, [isSearchRoute, rightCollapsed, rightOverlay, setRightWidth, sidePanelMaxWidth]); + const [panOffset, setPanOffset] = useState({ x: 0, y: 0 }); const [scale, setScale] = useState(1); diff --git a/packages/web/client/lib/knowledge-search.ts b/packages/web/client/lib/knowledge-search.ts new file mode 100644 index 00000000..6b21368d --- /dev/null +++ b/packages/web/client/lib/knowledge-search.ts @@ -0,0 +1,184 @@ +export type KnowledgeStatus = { + generatedAt: number; + paths: { + knowledgeRoot: string; + qmdRoot: string; + sqlitePath: string; + }; + collections: number; + readyCollections: number; + chunks: number; + activeJobs: Array<{ + id: string; + source: string; + state: string; + progress: { + discovered?: number; + extracted?: number; + indexed?: number; + failed?: number; + }; + updatedAt: number; + error?: string; + }>; + sqliteBytes: number; +}; + +export type PortablePath = { + root: string; + relPath: string; +}; + +export type KnowledgeSourceRef = + | { + kind: "harness_transcript"; + harness: string; + path: PortablePath; + sessionId?: string; + recordRange?: [number, number]; + } + | { kind: "file"; path: PortablePath; lineRange?: [number, number] } + | { kind: "skill"; path: PortablePath; skillName?: string } + | { kind: "context_pack"; path: PortablePath; packId?: string } + | { kind: "scout_record"; recordKind: string; id: string } + | { kind: "mcp_tool"; serverId: string; toolName: string; schemaPath?: string }; + +export type KnowledgeHit = { + id: string; + collectionId: string; + documentId: string; + chunkId: string; + title: string; + snippet: string; + score: number; + scoreSource: string; + origin: string; + ownership: string; + freshness: string; + sourceRefs: KnowledgeSourceRef[]; + facets: Record; +}; + +export type SearchResponse = { + q: string; + hits: KnowledgeHit[]; + status: KnowledgeStatus; +}; + +export type IndexedSession = { + collectionId: string; + title: string; + harness: string; + project: string; + transcriptPath: string; + qmdPath: string; + records: number; + documents: number; + chunks: number; + bytes: number; + mtimeMs: number; + skipped?: boolean; + error?: string; +}; + +export type IndexResponse = { + result: { + days: number; + discovered: number; + indexed: number; + failed: number; + sessions: IndexedSession[]; + }; + status: KnowledgeStatus; +}; + +export type KnowledgeSourcePreviewRecord = { + index: number; + raw: string; + type?: string; + role?: string; + kind?: string; + summary: string; + renderedText: string; + parsed: boolean; + matched?: boolean; + matchCount?: number; + matchTerms?: string[]; +}; + +export type KnowledgeSourcePreview = { + path: string; + sourcePath: PortablePath; + harness: string; + sessionId?: string; + requestedRange?: [number, number]; + previewRange: [number, number]; + records: KnowledgeSourcePreviewRecord[]; + recordsRead: number; + truncatedBefore: boolean; + truncatedAfter: boolean; + query?: string; + queryTerms?: string[]; +}; + +export type HighlightPart = { + text: string; + match: boolean; +}; + +export function pathLabel(path: PortablePath): string { + if (path.root === "HOME") return `~/${path.relPath}`; + if (path.root === "ABSOLUTE") return path.relPath; + return `$${path.root}/${path.relPath}`; +} + +export function firstTranscriptRef(hit: KnowledgeHit): Extract | null { + return hit.sourceRefs.find((ref): ref is Extract => + ref.kind === "harness_transcript" + ) ?? null; +} + +export function facetText(hit: KnowledgeHit, key: string): string { + const value = hit.facets[key]; + if (Array.isArray(value)) return value.filter(Boolean).join(", "); + return typeof value === "string" ? value : ""; +} + +export function queryTerms(query: string): string[] { + const seen = new Set(); + return query + .split(/[^A-Za-z0-9_./-]+/u) + .map((term) => term.trim()) + .filter((term) => term.length > 1) + .filter((term) => { + const key = term.toLowerCase(); + if (seen.has(key)) return false; + seen.add(key); + return true; + }) + .slice(0, 12); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +export function highlightParts(text: string, query: string): HighlightPart[] { + const terms = queryTerms(query); + if (terms.length === 0 || text.length === 0) return [{ text, match: false }]; + const regex = new RegExp(`(${terms.map(escapeRegExp).join("|")})`, "giu"); + const parts: HighlightPart[] = []; + let cursor = 0; + for (const match of text.matchAll(regex)) { + const index = match.index ?? 0; + if (index > cursor) { + parts.push({ text: text.slice(cursor, index), match: false }); + } + parts.push({ text: match[0], match: true }); + cursor = index + match[0].length; + } + if (cursor < text.length) { + parts.push({ text: text.slice(cursor), match: false }); + } + return parts.length > 0 ? parts : [{ text, match: false }]; +} diff --git a/packages/web/client/scout/Provider.tsx b/packages/web/client/scout/Provider.tsx index f9d1f6ff..275092a0 100644 --- a/packages/web/client/scout/Provider.tsx +++ b/packages/web/client/scout/Provider.tsx @@ -24,6 +24,7 @@ import { ScoutbotStateProvider } from "./scoutbot/ScoutbotStateContext.tsx"; import { SettingsDrawer } from "../screens/SettingsDrawer.tsx"; import type { Agent, BrokerRouteAttempt, Route } from "../lib/types.ts"; import type { ScoutTheme } from "../lib/theme.ts"; +import type { KnowledgeHit } from "../lib/knowledge-search.ts"; declare global { interface Window { @@ -80,6 +81,11 @@ export interface ScoutContextValue { inspectBrokerAttempt: (attempt: BrokerRouteAttempt) => void; clearBrokerAttempt: () => void; + selectedKnowledgeHit: KnowledgeHit | null; + selectedKnowledgeQuery: string; + inspectKnowledgeHit: (hit: KnowledgeHit, query?: string) => void; + clearKnowledgeHit: () => void; + openFilePreview: (path: string) => void; closeFilePreview: () => void; } @@ -203,12 +209,24 @@ export function ScoutProvider({ const [onboardingSkipped, setOnboardingSkipped] = useState(false); const [settingsOpen, setSettingsOpen] = useState(false); const [selectedBrokerAttempt, setSelectedBrokerAttempt] = useState(null); + const [selectedKnowledgeHit, setSelectedKnowledgeHit] = useState(null); + const [selectedKnowledgeQuery, setSelectedKnowledgeQuery] = useState(""); const openSettings = useCallback(() => setSettingsOpen(true), []); const closeSettings = useCallback(() => setSettingsOpen(false), []); const inspectBrokerAttempt = useCallback((attempt: BrokerRouteAttempt) => { setSelectedBrokerAttempt(attempt); }, []); const clearBrokerAttempt = useCallback(() => setSelectedBrokerAttempt(null), []); + const inspectKnowledgeHit = useCallback((hit: KnowledgeHit, query?: string) => { + setSelectedKnowledgeHit(hit); + if (typeof query === "string") { + setSelectedKnowledgeQuery(query.trim()); + } + }, []); + const clearKnowledgeHit = useCallback(() => { + setSelectedKnowledgeHit(null); + setSelectedKnowledgeQuery(""); + }, []); const themeVars = initialTheme === "light" ? LIGHT_THEME_VARS : DARK_THEME_VARS; const scoutbotAgentId = useMemo(() => resolveScoutbotAgentId(agents), [agents]); const scoutbotDmConversationId = useMemo(() => scoutbotConversationId(scoutbotAgentId), [scoutbotAgentId]); @@ -350,6 +368,7 @@ export function ScoutProvider({ settingsOpen, openSettings, closeSettings, scoutbotAgentId, scoutbotConversationId: scoutbotDmConversationId, applyScoutbotUiAction, selectedBrokerAttempt, inspectBrokerAttempt, clearBrokerAttempt, + selectedKnowledgeHit, selectedKnowledgeQuery, inspectKnowledgeHit, clearKnowledgeHit, openFilePreview, closeFilePreview, }), [ @@ -358,6 +377,7 @@ export function ScoutProvider({ settingsOpen, openSettings, closeSettings, scoutbotAgentId, scoutbotDmConversationId, applyScoutbotUiAction, selectedBrokerAttempt, inspectBrokerAttempt, clearBrokerAttempt, + selectedKnowledgeHit, selectedKnowledgeQuery, inspectKnowledgeHit, clearKnowledgeHit, openFilePreview, closeFilePreview, ], ); diff --git a/packages/web/client/scout/slots/Inspector.tsx b/packages/web/client/scout/slots/Inspector.tsx index 986fc5fe..9e9c1a62 100644 --- a/packages/web/client/scout/slots/Inspector.tsx +++ b/packages/web/client/scout/slots/Inspector.tsx @@ -16,6 +16,7 @@ import { WorkInspector } from "../inspector/WorkInspector.tsx"; import { MeshInspectorPanel } from "../inspector/MeshInspector.tsx"; import { ScoutbotPanel } from "../scoutbot/ScoutbotPanel.tsx"; import { BrokerAttemptInspector } from "../../screens/BrokerScreen.tsx"; +import { KnowledgeSearchInspector } from "../../screens/KnowledgeSearchInspector.tsx"; import { usePersistentBoolean, usePersistentNumber } from "../../lib/persistent-state.ts"; import { VerticalResizeHandle } from "./VerticalResizeHandle.tsx"; import type { Agent, AgentRun, FleetAsk, FleetAttentionItem, FleetState, OpsMode, Route, WorkItem } from "../../lib/types.ts"; @@ -96,6 +97,9 @@ export function ScoutInspector() { case "sessions": content = ; break; + case "search": + content = ; + break; case "conversation": content = ; break; diff --git a/packages/web/client/screens/KnowledgeSearchInspector.tsx b/packages/web/client/screens/KnowledgeSearchInspector.tsx new file mode 100644 index 00000000..7b16fa7f --- /dev/null +++ b/packages/web/client/screens/KnowledgeSearchInspector.tsx @@ -0,0 +1,500 @@ +import "./knowledge-search.css"; + +import { useEffect, useMemo, useState } from "react"; +import { + CheckCircle2, + Clock3, + Database, + ExternalLink, + FileJson, + Loader2, + MessageSquareText, + RefreshCw, + Waypoints, + X, +} from "lucide-react"; + +import { api } from "../lib/api.ts"; +import { + facetText, + firstTranscriptRef, + highlightParts, + pathLabel, + queryTerms, + type IndexResponse, + type KnowledgeSourcePreview, + type KnowledgeSourcePreviewRecord, + type KnowledgeStatus, +} from "../lib/knowledge-search.ts"; +import { useScout } from "../scout/Provider.tsx"; + +type InspectorTab = "preview" | "indexer"; + +function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes <= 0) return "0 B"; + const units = ["B", "KB", "MB", "GB"]; + let value = bytes; + let unit = 0; + while (value >= 1024 && unit < units.length - 1) { + value /= 1024; + unit++; + } + return `${value >= 10 || unit === 0 ? value.toFixed(0) : value.toFixed(1)} ${units[unit]}`; +} + +function formatCount(value: number): string { + return new Intl.NumberFormat("en-US").format(value || 0); +} + +function formatTime(ms: number | undefined): string { + if (!ms) return "unknown"; + return new Date(ms).toLocaleString([], { + month: "short", + day: "numeric", + hour: "numeric", + minute: "2-digit", + }); +} + +function HighlightedText({ text, query }: { text: string; query: string }) { + return ( + <> + {highlightParts(text, query).map((part, index) => + part.match ? {part.text} : {part.text} + )} + + ); +} + +function scoreLabel(value: number): string { + return Number.isFinite(value) ? value.toFixed(3) : "0.000"; +} + +function matchedFields(input: { + title: string; + snippet: string; + preview: KnowledgeSourcePreview | null; + query: string; +}): string[] { + const terms = queryTerms(input.query); + if (terms.length === 0) return []; + const containsTerm = (text: string) => { + const lower = text.toLowerCase(); + return terms.some((term) => lower.includes(term.toLowerCase())); + }; + const fields: string[] = []; + if (containsTerm(input.title)) fields.push("title"); + if (containsTerm(input.snippet)) fields.push("indexed snippet"); + if (input.preview?.records.some((record) => record.matched)) fields.push("source records"); + return fields; +} + +function recordText(record: KnowledgeSourcePreviewRecord): string { + return record.renderedText || record.summary || record.raw; +} + +function recordKind(record: KnowledgeSourcePreviewRecord): string { + return record.kind || record.role || record.type || "record"; +} + +function recordKindLabel(record: KnowledgeSourcePreviewRecord): string { + const kind = recordKind(record); + if (kind === "assistant_turn" || kind === "assistant") return "assistant"; + if (kind === "user_turn" || kind === "user") return "user"; + if (kind === "system_record" || kind === "system") return "system"; + if (kind === "command_or_tool" || kind === "tool_use") return "tool"; + if (kind === "response_item") return "tool output"; + return kind; +} + +function recordPriority(record: KnowledgeSourcePreviewRecord): number { + const kind = recordKind(record); + if ( + kind === "assistant" + || kind === "assistant_turn" + || kind === "user" + || kind === "user_turn" + || kind === "last-prompt" + || kind === "ai-title" + ) { + return 0; + } + if (kind === "system" || kind === "system_record") return 1; + return 2; +} + +export function KnowledgeSearchInspector() { + const { + selectedKnowledgeHit, + selectedKnowledgeQuery, + clearKnowledgeHit, + openFilePreview, + } = useScout(); + const [tab, setTab] = useState("preview"); + const [status, setStatus] = useState(null); + const [preview, setPreview] = useState(null); + const [lastRun, setLastRun] = useState(null); + const [loadingPreview, setLoadingPreview] = useState(false); + const [indexing, setIndexing] = useState(false); + const [error, setError] = useState(null); + + const transcript = selectedKnowledgeHit ? firstTranscriptRef(selectedKnowledgeHit) : null; + const project = selectedKnowledgeHit ? facetText(selectedKnowledgeHit, "project") : ""; + const harness = selectedKnowledgeHit ? facetText(selectedKnowledgeHit, "harness") : ""; + const activeQuery = selectedKnowledgeQuery.trim(); + const activeJob = status?.activeJobs[0] ?? null; + const matchedRecords = useMemo(() => + preview?.records + .filter((record) => record.matched) + .sort((left, right) => { + const priority = recordPriority(left) - recordPriority(right); + if (priority !== 0) return priority; + return (right.matchCount ?? 0) - (left.matchCount ?? 0); + }) + .slice(0, 4) ?? [], + [preview], + ); + const fields = selectedKnowledgeHit + ? matchedFields({ + title: selectedKnowledgeHit.title, + snippet: selectedKnowledgeHit.snippet, + preview, + query: activeQuery, + }) + : []; + const queryTermLabels = queryTerms(activeQuery); + const firstOpenRecord = preview?.records.find((record) => record.matched)?.index ?? transcript?.recordRange?.[0]; + + const metrics = useMemo(() => [ + { label: "Collections", value: formatCount(status?.readyCollections ?? 0) }, + { label: "Chunks", value: formatCount(status?.chunks ?? 0) }, + { label: "Index", value: formatBytes(status?.sqliteBytes ?? 0) }, + { label: "Updated", value: status ? formatTime(status.generatedAt) : "loading" }, + ], [status]); + + const refreshStatus = async () => { + try { + setError(null); + setStatus(await api("/api/knowledge/status")); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } + }; + + useEffect(() => { + void refreshStatus(); + }, []); + + useEffect(() => { + if (!selectedKnowledgeHit) return; + setTab("preview"); + }, [selectedKnowledgeHit?.id]); + + useEffect(() => { + let cancelled = false; + const loadPreview = async () => { + if (!selectedKnowledgeHit || !transcript) { + setPreview(null); + return; + } + setLoadingPreview(true); + try { + setError(null); + const response = await api("/api/knowledge/source-preview", { + method: "POST", + body: JSON.stringify({ + sourceRef: transcript, + contextRecords: 4, + maxRecords: 80, + q: activeQuery, + }), + }); + if (!cancelled) setPreview(response); + } catch (err) { + if (!cancelled) { + setPreview(null); + setError(err instanceof Error ? err.message : String(err)); + } + } finally { + if (!cancelled) setLoadingPreview(false); + } + }; + void loadPreview(); + return () => { + cancelled = true; + }; + }, [selectedKnowledgeHit?.id, transcript?.recordRange?.[0], transcript?.recordRange?.[1], activeQuery]); + + const buildIndex = async (force = false) => { + setIndexing(true); + try { + setError(null); + const response = await api("/api/knowledge/sessions/index", { + method: "POST", + body: JSON.stringify({ days: 3, limit: 260, force }), + }); + setLastRun(response.result); + setStatus(response.status); + setTab("indexer"); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } finally { + setIndexing(false); + } + }; + + const openTranscript = () => { + if (!transcript) return; + openFilePreview(pathLabel(transcript.path)); + }; + + return ( +
+
+ + +
+ + {error &&
{error}
} + + {tab === "preview" ? ( +
+ {!selectedKnowledgeHit ? ( +
+
+ ) : ( + <> +
+
+ Selected result +

{selectedKnowledgeHit.title}

+
+ +
+ +
+ {project && {project}} + {harness && {harness}} + {transcript?.recordRange && records {transcript.recordRange[0]}..{transcript.recordRange[1]}} +
+ + {matchedRecords.length > 0 && ( +
+
+
+ {matchedRecords.map((record) => ( +
+
+ {String(record.index).padStart(4, "0")} + {recordKindLabel(record)} + {record.matchCount ?? 0} match{record.matchCount === 1 ? "" : "es"} +
+

+
+ ))} +
+ )} + +
+ Indexed snippet +

+
+ +
+
+
+
+
+ Index rank + {scoreLabel(selectedKnowledgeHit.score)} + lower values sort earlier in lexical search +
+
+ Matched terms + {queryTermLabels.length > 0 ? queryTermLabels.join(", ") : "none"} + {activeQuery ? `query "${activeQuery}"` : "no active query captured"} +
+
+ Matched in + {fields.length > 0 ? fields.join(", ") : "indexed chunk"} + {selectedKnowledgeHit.scoreSource === "fts" ? "lexical index over QMD title/body" : `${selectedKnowledgeHit.scoreSource} over QMD title/body`} +
+
+
+ + {transcript && ( +
+ Transcript + {pathLabel(transcript.path)} + +
+ )} + + {loadingPreview ? ( +
+
+ ) : preview ? ( +
+
+ Raw JSONL evidence + + records {preview.previewRange[0]}..{preview.previewRange[1]} + {preview.truncatedBefore ? " · earlier records hidden" : ""} + {preview.truncatedAfter ? " · later records hidden" : ""} + +
+ {preview.records.map((record) => ( +
+ + {String(record.index).padStart(4, "0")} + {record.kind || record.role || record.type || "record"} + + +
{record.raw}
+
+ ))} +
+ ) : ( +
+ No JSONL preview is available for this result. +
+ )} + + )} +
+ ) : ( +
+
+
+ Indexer +

Three-day session pipeline

+
+ +
+ +
+ + +
+ +
+ {metrics.map((metric) => ( +
+ {metric.label} + {metric.value} +
+ ))} +
+ +
+
+ Frequency + Manual refresh +
+
+ Window + Last 3 days, up to 260 sessions +
+
+ Embedding stage + Optional next phase +
+
+ +
+
+
+
+
+
+
+
+
+
+ + {activeJob && ( +
+ {activeJob.source} {activeJob.state} + + {formatCount(activeJob.progress.indexed ?? 0)} indexed of {formatCount(activeJob.progress.discovered ?? 0)} discovered + +
+ )} + + {lastRun && ( +
+ Last run + + {formatCount(lastRun.indexed)} indexed, {formatCount(lastRun.failed)} failed, {formatCount(lastRun.discovered)} discovered + +
+ )} + +
+
+ QMD root + {status?.paths.qmdRoot ?? "not initialized"} +
+
+ SQLite + {status?.paths.sqlitePath ?? "not initialized"} +
+
+
+ )} +
+ ); +} diff --git a/packages/web/client/screens/KnowledgeSearchScreen.tsx b/packages/web/client/screens/KnowledgeSearchScreen.tsx index 1c62fd5e..9ba92d26 100644 --- a/packages/web/client/screens/KnowledgeSearchScreen.tsx +++ b/packages/web/client/screens/KnowledgeSearchScreen.tsx @@ -1,657 +1,275 @@ import "./knowledge-search.css"; -import { useMemo, useState } from "react"; +import { FormEvent, useEffect, useState } from "react"; import { - Archive, - Bot, - CheckCircle2, - Clock3, Database, FileSearch, - GitBranch, - Layers3, - MessageSquareText, + Loader2, Search, - SlidersHorizontal, - Sparkles, - Waypoints, } from "lucide-react"; -import type { LucideIcon } from "lucide-react"; import type { Route } from "../lib/types.ts"; +import { api } from "../lib/api.ts"; +import { useScout } from "../scout/Provider.tsx"; +import { + facetText, + firstTranscriptRef, + highlightParts, + pathLabel, + queryTerms, + type KnowledgeHit, + type KnowledgeStatus, + type SearchResponse, +} from "../lib/knowledge-search.ts"; + +const SAMPLE_QUERIES = [ + "QMD", + "embeddings", + "search surface", + "raw log drilldown", + "MCP", + "context pack", + "API", +]; -type ExtractionModeId = "mechanical" | "standard" | "deep"; - -type ExtractionMode = { - id: ExtractionModeId; - label: string; - summary: string; - llmUse: string; - output: string; -}; - -type PipelineStage = { - id: string; - label: string; - status: string; - detail: string; - icon: LucideIcon; -}; - -type SearchHit = { - title: string; - location: string; - score: string; - snippet: string; - source: string; -}; - -type SearchScenario = { - id: string; - query: string; - intent: string; - answer: string; - hits: SearchHit[]; -}; +function formatCount(value: number): string { + return new Intl.NumberFormat("en-US").format(value || 0); +} -const SESSION_PATH = - "/Users/arach/.codex/sessions/2026/05/30/rollout-2026-05-30T11-25-39-019e797d-ab15-7823-b322-4434c5831317.jsonl"; +function useKnowledgeStatus() { + const [status, setStatus] = useState(null); + const [error, setError] = useState(null); -const SIDE_CAR_ROOT = "/tmp/openscout-qmd-e2e/docs"; + const refresh = async () => { + try { + setError(null); + setStatus(await api("/api/knowledge/status")); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } + }; -const EXTRACTION_MODES: ExtractionMode[] = [ - { - id: "mechanical", - label: "Mechanical", - summary: "Chunk, normalize, attach source refs, and index immediately.", - llmUse: "No LLM pass", - output: "raw event docs + tool-call catalog", - }, - { - id: "standard", - label: "Summary", - summary: "Add compact decision, file, problem, and next-action summaries before indexing.", - llmUse: "Small summary pass", - output: "QMD-ready docs + topic summaries", - }, - { - id: "deep", - label: "Deep", - summary: "Run focused extraction against a declared interest set before building the index.", - llmUse: "Selective LLM pass", - output: "curated knowledge pack + raw refs", - }, -]; + useEffect(() => { + void refresh(); + }, []); -const PIPELINE: PipelineStage[] = [ - { - id: "select", - label: "Session Set", - status: "curated", - detail: "The user chooses sessions worth remembering instead of importing every harness log.", - icon: Archive, - }, - { - id: "extract", - label: "Extraction", - status: "derived", - detail: "QMD-style markdown docs are produced from observed transcripts with stable source refs.", - icon: Layers3, - }, - { - id: "index", - label: "Fuzzy Index", - status: "rebuildable", - detail: "FTS/fuzzy search targets the derived corpus; vectors can be added later, not required first.", - icon: Database, - }, - { - id: "talk", - label: "LLM Conversation", - status: "assisted", - detail: "The assistant works over extracted knowledge first, keeping answers cheap and directed.", - icon: MessageSquareText, - }, - { - id: "drilldown", - label: "Raw Drilldown", - status: "anchored", - detail: "When confidence matters, jump back to the exact JSONL source and event-level context.", - icon: FileSearch, - }, -]; + return { status, setStatus, error, setError, refresh }; +} -const METRICS = [ - { label: "Source sessions", value: "1", detail: "Codex JSONL" }, - { label: "Observed events", value: "310", detail: "source material" }, - { label: "Derived docs", value: "13", detail: "markdown files" }, - { label: "Index size", value: "548 KB", detail: "SQLite / FTS" }, - { label: "Vectors", value: "0", detail: "lexical first" }, -]; +function HighlightedText({ text, query }: { text: string; query: string }) { + return ( + <> + {highlightParts(text, query).map((part, index) => + part.match ? {part.text} : {part.text} + )} + + ); +} -const SEARCH_SCENARIOS: SearchScenario[] = [ - { - id: "drilldown", - query: "which session discussed raw log drilldown?", - intent: "Find the strategy conversation before opening the transcript.", - answer: - "This session is about a two-step session-knowledge flow: summarize/extract into a search corpus, then use retrieval to jump back to raw logs only when needed.", - hits: [ - { - title: "events-07.md", - location: "line 133", - score: "BM25 0.91", - snippet: - "semantic conversation first, raw-log drilldown second", - source: "qmd://scout-session/events-07.md:133", - }, - { - title: "overview.md", - location: "line 18", - score: "BM25 0.77", - snippet: - "session knowledge collections, QMD store creation, and event-level lookup", - source: "qmd://scout-session/overview.md:18", - }, - ], - }, - { - id: "policy", - query: "QMD store extraction policy", - intent: "Separate QMD's store mechanics from Scout's extraction policy.", - answer: - "QMD provides the markdown store and search mechanics. Scout would own the pre-index extraction policy: decisions, files, errors, next actions, and source coordinates.", - hits: [ - { - title: "events-09.md", - location: "line 38", - score: "BM25 0.88", - snippet: - "store creation is mostly index/database setup", - source: "qmd://scout-session/events-09.md:38", - }, - { - title: "tool-calls.md", - location: "line 74", - score: "BM25 0.65", - snippet: - "collection add, context add, update, search, and get verified the store loop", - source: "qmd://scout-session/tool-calls.md:74", - }, - ], - }, - { - id: "logs", - query: "extract knowledge and make fast search from logs", - intent: "Recover the source idea and the concrete search shape.", - answer: - "The useful pattern is not bulk transcript import. It is a derived knowledge set with line-addressable docs, fuzzy search, freshness metadata, and a raw-log escape hatch.", - hits: [ - { - title: "overview.md", - location: "line 7", - score: "BM25 0.83", - snippet: - "extract knowledge from logs, build fast search, then converse over the resulting dataset", - source: "qmd://scout-session/overview.md:7", - }, - { - title: "events-05.md", - location: "line 92", - score: "BM25 0.72", - snippet: - "user-curated session sets become indexed knowledge collections", - source: "qmd://scout-session/events-05.md:92", - }, - ], - }, -]; +function compactPath(label: string): string { + const normalized = label.replace(/\\/g, "/"); + if (!normalized.includes("/")) return normalized; + const parts = normalized.split("/").filter(Boolean); + const file = parts.at(-1) ?? normalized; + if (normalized.startsWith("~/")) return `~/.../${file}`; + if (normalized.startsWith("/")) return `/.../${file}`; + return parts.length > 2 ? `${parts[0]}/.../${file}` : normalized; +} -const DOC_ROWS = [ - { name: "overview.md", kind: "summary", weight: "high", refs: "session + topics" }, - { name: "tool-calls.md", kind: "catalog", weight: "medium", refs: "commands + outputs" }, - { name: "events-01.md ... events-11.md", kind: "chunks", weight: "source", refs: "event windows" }, -]; +function matchedTermsForHit(hit: KnowledgeHit, query: string): string[] { + const title = hit.title.toLowerCase(); + const snippet = hit.snippet.toLowerCase(); + return queryTerms(query).filter((term) => { + const lower = term.toLowerCase(); + return title.includes(lower) || snippet.includes(lower); + }); +} -const WEEKLY_SCOPE_ROWS = [ - { label: "Codex sessions", value: "78", detail: "191 MiB raw JSONL" }, - { label: "Claude main", value: "72", detail: "228 MiB raw JSONL" }, - { label: "Claude subagents", value: "114", detail: "56 MiB raw JSONL" }, - { label: "Claude history", value: "1", detail: "13 MiB raw JSONL" }, - { label: "All observed", value: "266", detail: "489 MiB this week" }, -]; +function rankReason(hit: KnowledgeHit, query: string): string { + const terms = matchedTermsForHit(hit, query); + if (terms.length === 1) return `Matched "${terms[0]}" in indexed QMD`; + if (terms.length > 1) return `Matched ${terms.length} query terms in indexed QMD`; + return "Matched indexed QMD session knowledge"; +} -const SAMPLE_SESSION_ROWS = [ - { - harness: "Codex", - tier: "large", - size: "13.0 MiB", - events: "4,220", - rawEstimate: "~3.4M raw-token eq.", - modified: "2026-05-29 23:29", - path: "~/.codex/sessions/2026/05/29/...019e75fd-a431...jsonl", - }, - { - harness: "Codex", - tier: "normal", - size: "1.1 MiB", - events: "494", - rawEstimate: "~289k raw-token eq.", - modified: "2026-05-25 15:45", - path: "~/.codex/sessions/2026/05/25/...019e609c-4389...jsonl", - }, - { - harness: "Codex", - tier: "small", - size: "34 KiB", - events: "12", - rawEstimate: "~9k raw-token eq.", - modified: "2026-05-29 00:16", - path: "~/.codex/sessions/2026/05/29/...019e71f2-958c...jsonl", - }, - { - harness: "Claude", - tier: "large", - size: "52.9 MiB", - events: "12,009", - rawEstimate: "~13.9M raw-token eq.", - modified: "2026-05-26 02:49", - path: "~/.claude/projects/-Users-arach-dev-openscout/a00198bf...jsonl", - }, - { - harness: "Claude", - tier: "normal", - size: "745 KiB", - events: "252", - rawEstimate: "~191k raw-token eq.", - modified: "2026-05-23 13:11", - path: "~/.claude/projects/-Users-arach-dev-openscout/c680a795...jsonl", - }, - { - harness: "Claude", - tier: "small", - size: "2.1 KiB", - events: "5", - rawEstimate: "~500 raw-token eq.", - modified: "2026-05-24 21:57", - path: "~/.claude/projects/-Users-arach-dev-contextual/ada6d81e...jsonl", - }, -]; +function displaySnippet(hit: KnowledgeHit, query: string): string { + const compact = hit.snippet.replace(/\s+/g, " ").trim(); + const marker = /\s-\s\[\d{3,}\]\s`[^`]+`(?:\s\([^)]*\))?\s-\s/u.exec(compact); + if (!marker) return compact; -const WEEK_PREP_ROWS = [ - { - step: "Inventory", - input: "266 files / 489 MiB", - output: "session manifest", - timing: "1-5s", - }, - { - step: "Mechanical extraction", - input: "raw JSONL", - output: "25-100 MiB markdown", - timing: "30-120s", - }, - { - step: "FTS/fuzzy index", - input: "derived markdown", - output: "75-300 MiB SQLite", - timing: "30-180s", - }, - { - step: "First useful search", - input: "local index", - output: "ranked hits + source refs", - timing: "<100ms/query", - }, - { - step: "LLM enrichment", - input: "selected chunks", - output: "decisions, files, problems", - timing: "10-60m async", - }, -]; + const before = compact.slice(0, marker.index).trim(); + const after = compact.slice(marker.index + marker[0].length).trim(); + const terms = queryTerms(query).map((term) => term.toLowerCase()); + const beforeHasQuery = terms.some((term) => before.toLowerCase().includes(term)); -function classPart(value: string): string { - return value.replace(/[^a-z0-9]+/gi, "-").replace(/^-|-$/g, "").toLowerCase(); + if (before && (beforeHasQuery || after.startsWith("{") || after.startsWith("["))) { + return before; + } + return after || before || compact; } -export function KnowledgeSearchScreen({ navigate }: { navigate: (route: Route) => void }) { - const [modeId, setModeId] = useState("standard"); - const [scenarioId, setScenarioId] = useState(SEARCH_SCENARIOS[0]!.id); - - const selectedMode = useMemo( - () => EXTRACTION_MODES.find((mode) => mode.id === modeId) ?? EXTRACTION_MODES[0]!, - [modeId], - ); - const selectedScenario = useMemo( - () => SEARCH_SCENARIOS.find((scenario) => scenario.id === scenarioId) ?? SEARCH_SCENARIOS[0]!, - [scenarioId], - ); +export function KnowledgeSearchScreen({ navigate: _navigate }: { navigate: (route: Route) => void }) { + const { selectedKnowledgeHit, inspectKnowledgeHit, clearKnowledgeHit } = useScout(); + const { status, setStatus, error, setError } = useKnowledgeStatus(); + const [query, setQuery] = useState("QMD search"); + const [hits, setHits] = useState([]); + const [searching, setSearching] = useState(false); + const [indexing, setIndexing] = useState(false); + + const hasIndex = (status?.chunks ?? 0) > 0; + + const runSearch = async (nextQuery = query) => { + const trimmed = nextQuery.trim(); + if (!trimmed || !hasIndex) { + setHits([]); + return; + } + setSearching(true); + try { + setError(null); + const params = new URLSearchParams({ q: trimmed, limit: "30" }); + const response = await api(`/api/knowledge/search?${params.toString()}`); + setHits(response.hits); + setStatus(response.status); + if (response.hits[0]) { + inspectKnowledgeHit(response.hits[0], trimmed); + } else { + clearKnowledgeHit(); + } + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } finally { + setSearching(false); + } + }; + + useEffect(() => { + if (!hasIndex) return; + const timer = window.setTimeout(() => { + void runSearch(query); + }, 250); + return () => window.clearTimeout(timer); + }, [query, hasIndex]); + + const buildIndex = async (force = false) => { + setIndexing(true); + try { + setError(null); + const response = await api<{ status: KnowledgeStatus }>("/api/knowledge/sessions/index", { + method: "POST", + body: JSON.stringify({ days: 3, limit: 260, force }), + }); + setStatus(response.status); + await runSearch(query); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } finally { + setIndexing(false); + } + }; + + const onSubmit = (event: FormEvent) => { + event.preventDefault(); + void runSearch(query); + }; return (
-
-
-
-
-

QMD-style extraction, fuzzy search, then raw-log drilldown.

-

- A Scout-native view of the workflow: choose sessions, derive a markdown knowledge - corpus, search that corpus quickly, and only open transcript-level evidence when the - conversation needs it. -

-
-
- - -
-
- -
- {METRICS.map((metric) => ( -
- {metric.label} - {metric.value} - {metric.detail} -
- ))} -
- -
-
- -
-
-
- One-week run -

Local log footprint for a heavy week

-
-
-
- {WEEKLY_SCOPE_ROWS.map((row) => ( -
- {row.label} - {row.value} - {row.detail} -
- ))} -
-
- -
-
-
- Representative sample -

Large, normal, and small sessions from Codex and Claude

-
-
-
-
- Harness - Tier - Size - Events - Rough size - Modified - Path -
- {SAMPLE_SESSION_ROWS.map((row) => ( -
- {row.harness} - {row.tier} - {row.size} - {row.events} - {row.rawEstimate} - {row.modified} - {row.path} -
- ))} -
-
- -
- - +
-
-
- Conversation layer -

Ask the derived corpus first

-
-
- -
- {SEARCH_SCENARIOS.map((scenario) => ( +
+
- - -
+ )} -
-
-
- Back-of-envelope budget -

What it takes to index a week

-
-
-
- {WEEK_PREP_ROWS.map((row) => ( -
- {row.step} -
-
-
Input
-
{row.input}
-
-
-
Output
-
{row.output}
-
-
-
Timing
-
{row.timing}
-
-
-
- ))} -
-
- -
-
-
-
- Derived files -

QMD-ready corpus

-
-
-
-
- Document - Kind - Weight - Refs -
- {DOC_ROWS.map((doc) => ( -
- {doc.name} - {doc.kind} - {doc.weight} - {doc.refs} + {hasIndex && ( +
+
+ {searching ? "Searching derived QMD chunks..." : `${hits.length} matching chunks`} + {status ? `${formatCount(status.chunks)} indexed` : "index"}
- ))} -
-
- -
-
-
- Evidence -

Raw transcript drilldown

-
-
-
-
- Derived corpus - {SIDE_CAR_ROOT} -
-
- Source transcript - {SESSION_PATH} -
-
- Drilldown command - qmd get qmd://scout-session/events-07.md:128 -l 38 --format md + {hits.length === 0 && !searching ? ( +
+ No hits for this query. Try a project name, file path, tool name, or concept from recent work. +
+ ) : hits.map((hit) => { + const transcript = firstTranscriptRef(hit); + const project = facetText(hit, "project"); + const harness = facetText(hit, "harness"); + const selected = selectedKnowledgeHit?.id === hit.id; + const sourcePath = transcript ? pathLabel(transcript.path) : ""; + const resultSnippet = displaySnippet(hit, query); + return ( + + ); + })}
-
+ )}
diff --git a/packages/web/client/screens/knowledge-search.css b/packages/web/client/screens/knowledge-search.css index 54135b38..b3cb3338 100644 --- a/packages/web/client/screens/knowledge-search.css +++ b/packages/web/client/screens/knowledge-search.css @@ -7,8 +7,8 @@ container-type: inline-size; display: flex; flex-direction: column; - gap: var(--space-xl); - padding: var(--space-4xl) var(--space-4xl) var(--space-5xl); + gap: var(--space-lg); + padding: var(--space-xl) var(--space-4xl) var(--space-5xl); background: linear-gradient(180deg, color-mix(in srgb, var(--surface) 28%, transparent), transparent 220px), var(--bg); @@ -402,14 +402,30 @@ } .ks-hit { + appearance: none; min-width: 0; display: flex; flex-direction: column; gap: var(--space-xs); + width: 100%; padding: var(--space-md); border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); border-radius: var(--radius-lg); background: color-mix(in srgb, var(--bg) 28%, transparent); + color: inherit; + text-align: left; + cursor: pointer; +} + +.ks-hit:hover, +.ks-hit--selected { + border-color: color-mix(in srgb, var(--accent) 42%, var(--border)); + background: color-mix(in srgb, var(--accent) 7%, var(--surface)); +} + +.ks-hit:focus-visible { + outline: 2px solid color-mix(in srgb, var(--accent) 70%, transparent); + outline-offset: 2px; } .ks-hit-title { @@ -426,8 +442,12 @@ } .ks-hit-title strong { + min-width: 0; + flex: 1 1 auto; color: var(--ink); font-size: var(--text-md); + line-height: var(--leading-tight); + overflow-wrap: anywhere; } .ks-hit-title span, @@ -438,6 +458,14 @@ font-style: normal; } +.ks-hit-title em { + flex: none; + padding: 3px var(--space-sm); + border-radius: var(--radius-pill); + background: color-mix(in srgb, var(--accent) 8%, transparent); + color: color-mix(in srgb, var(--accent) 82%, var(--ink)); +} + .ks-hit p { margin: 0; color: var(--muted); @@ -445,7 +473,30 @@ line-height: var(--leading-tight); } -.ks-hit code, +.ks-hit-reason { + display: flex; + align-items: center; + min-width: 0; + color: color-mix(in srgb, var(--accent) 84%, var(--ink)); + font-size: var(--text-sm); + line-height: var(--leading-tight); +} + +.ks-hit-reason span { + min-width: 0; + padding-left: var(--space-sm); + border-left: 2px solid color-mix(in srgb, var(--accent) 42%, transparent); + overflow-wrap: anywhere; +} + +.ks-hit mark, +.ks-inspector mark { + padding: 0 2px; + border-radius: 3px; + background: color-mix(in srgb, var(--accent) 28%, transparent); + color: color-mix(in srgb, var(--accent) 94%, var(--ink)); +} + .ks-drill code { min-width: 0; color: var(--green); @@ -455,6 +506,20 @@ overflow-wrap: anywhere; } +.ks-hit-source { + min-width: 0; + width: fit-content; + max-width: 100%; + padding: 3px var(--space-sm); + border-radius: var(--radius-md); + background: color-mix(in srgb, var(--ink) 4%, transparent); + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + .ks-week-grid { display: grid; grid-template-columns: repeat(5, minmax(120px, 1fr)); @@ -804,8 +869,686 @@ font-size: var(--text-xs); } +.ks-live-grid { + display: grid; + grid-template-columns: minmax(0, 1fr); + gap: var(--space-lg); + align-items: start; +} + +.ks-error { + min-width: 0; + padding: var(--space-md) var(--space-lg); + border: 1px solid color-mix(in srgb, var(--red) 25%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--red) 8%, var(--surface)); + color: color-mix(in srgb, var(--red) 85%, var(--ink)); + font-size: var(--text-md); + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + +.ks-search-form { + display: grid; + grid-template-columns: 20px minmax(0, 1fr) auto; + align-items: center; + gap: var(--space-sm); + min-width: 0; + padding: var(--space-sm); + border: 1px solid color-mix(in srgb, var(--ink) 8%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--bg) 45%, var(--surface)); +} + +.ks-search-form > svg { + color: var(--muted); + margin-left: var(--space-sm); +} + +.ks-search-form input { + min-width: 0; + height: 36px; + border: 0; + outline: none; + background: transparent; + color: var(--ink); + font: inherit; + font-size: var(--text-lg); +} + +.ks-search-form input::placeholder { + color: var(--dim); +} + +.ks-search-form button, +.ks-primary-button { + appearance: none; + height: 34px; + display: inline-flex; + align-items: center; + justify-content: center; + gap: var(--space-sm); + padding: 0 var(--space-lg); + border: 1px solid color-mix(in srgb, var(--accent) 38%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--accent) 16%, var(--surface)); + color: color-mix(in srgb, var(--accent) 90%, var(--ink)); + font-size: var(--text-md); + font-weight: 680; + line-height: var(--leading-none); + cursor: pointer; +} + +.ks-search-form button:disabled, +.ks-primary-button:disabled, +.ks-icon-button:disabled { + cursor: not-allowed; + opacity: 0.55; +} + +.ks-empty-state, +.ks-empty-hit { + min-width: 0; + display: flex; + flex-direction: column; + align-items: flex-start; + gap: var(--space-sm); + padding: var(--space-xl); + border: 1px dashed color-mix(in srgb, var(--ink) 12%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--surface) 68%, transparent); + color: var(--muted); +} + +.ks-empty-state strong { + color: var(--ink); + font-size: var(--text-lg); +} + +.ks-empty-state span, +.ks-empty-hit { + font-size: var(--text-md); + line-height: var(--leading-snug); +} + +.ks-hit-meta { + display: flex; + align-items: center; + flex-wrap: wrap; + gap: var(--space-xs); + min-width: 0; +} + +.ks-hit-meta span { + display: inline-flex; + min-width: 0; + max-width: 100%; + padding: 3px var(--space-sm); + border-radius: var(--radius-pill); + background: color-mix(in srgb, var(--ink) 5%, transparent); + color: var(--muted); + font-size: var(--text-xs); + line-height: var(--leading-none); + overflow-wrap: anywhere; +} + +.ks-index-actions { + display: flex; + align-items: center; + flex-wrap: wrap; + gap: var(--space-sm); +} + +.ks-pipeline-compact { + display: grid; + grid-template-columns: 1fr; + gap: var(--space-sm); +} + +.ks-pipeline-compact > div { + display: flex; + align-items: center; + gap: var(--space-sm); + min-width: 0; + padding: var(--space-sm) 0; + border-top: 1px solid color-mix(in srgb, var(--ink) 6%, transparent); + color: var(--muted); + font-size: var(--text-md); +} + +.ks-pipeline-compact > div:first-child { + border-top: 0; +} + +.ks-pipeline-compact svg { + flex: none; + color: var(--accent); +} + +.ks-job { + display: flex; + flex-direction: column; + gap: var(--space-3xs); + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--accent) 18%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--accent) 7%, transparent); +} + +.ks-job strong { + color: var(--ink); +} + +.ks-job span { + color: var(--muted); + font-size: var(--text-sm); +} + +.ks-paths { + display: flex; + flex-direction: column; + gap: var(--space-md); +} + +.ks-paths > div { + display: flex; + flex-direction: column; + gap: var(--space-3xs); + min-width: 0; +} + +.ks-paths span { + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); +} + +.ks-paths code { + overflow-wrap: anywhere; + color: var(--muted); + font-size: var(--text-xs); +} + +.ks-inspector { + height: 100%; + min-height: 0; + display: flex; + flex-direction: column; + gap: var(--space-md); + padding: var(--space-md); + overflow: hidden; + color: var(--ink); +} + +.ks-inspector-tabs { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: var(--space-2xs); + padding: var(--space-2xs); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--bg) 38%, transparent); +} + +.ks-inspector-tab { + appearance: none; + min-width: 0; + height: 30px; + display: inline-flex; + align-items: center; + justify-content: center; + gap: var(--space-sm); + padding: 0 var(--space-sm); + border: 0; + border-radius: var(--radius-md); + background: transparent; + color: var(--muted); + font-size: var(--text-sm); + font-weight: 660; + cursor: pointer; +} + +.ks-inspector-tab.active, +.ks-inspector-tab:hover { + color: var(--ink); + background: color-mix(in srgb, var(--accent) 11%, transparent); +} + +.ks-inspector-error { + padding: var(--space-sm) var(--space-md); + border: 1px solid color-mix(in srgb, var(--red) 25%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--red) 8%, transparent); + color: color-mix(in srgb, var(--red) 85%, var(--ink)); + font-size: var(--text-sm); + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + +.ks-preview-panel, +.ks-indexer-panel { + min-height: 0; + display: flex; + flex: 1 1 auto; + flex-direction: column; + gap: var(--space-md); + overflow: auto; +} + +.ks-preview-head { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: var(--space-md); + min-width: 0; +} + +.ks-preview-head h2 { + margin: var(--space-3xs) 0 0; + color: var(--ink); + font-family: var(--font-accent-title); + font-size: var(--text-lg); + font-weight: 650; + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + +.ks-preview-head button { + appearance: none; + flex: none; + width: 28px; + height: 28px; + display: inline-flex; + align-items: center; + justify-content: center; + border: 1px solid color-mix(in srgb, var(--ink) 8%, transparent); + border-radius: var(--radius-md); + background: color-mix(in srgb, var(--surface) 85%, transparent); + color: var(--muted); + cursor: pointer; +} + +.ks-preview-head button:hover { + color: var(--ink); + border-color: color-mix(in srgb, var(--accent) 32%, var(--border)); +} + +.ks-inspector-empty { + min-width: 0; + display: flex; + flex-direction: column; + align-items: flex-start; + gap: var(--space-sm); + padding: var(--space-lg); + border: 1px dashed color-mix(in srgb, var(--ink) 12%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--surface) 58%, transparent); + color: var(--muted); + font-size: var(--text-md); + line-height: var(--leading-snug); +} + +.ks-inspector-empty strong { + color: var(--ink); + font-size: var(--text-lg); +} + +.ks-preview-meta { + display: flex; + flex-wrap: wrap; + gap: var(--space-xs); +} + +.ks-preview-meta span { + padding: 3px var(--space-sm); + border-radius: var(--radius-pill); + background: color-mix(in srgb, var(--ink) 5%, transparent); + color: var(--muted); + font-size: var(--text-xs); + line-height: var(--leading-none); +} + +.ks-preview-snippet { + margin: 0; + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--bg) 34%, transparent); + color: var(--muted); + font-size: var(--text-sm); + line-height: var(--leading-snug); +} + +.ks-rank-explainer { + min-width: 0; + display: flex; + flex-direction: column; + gap: var(--space-sm); + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--bg) 30%, transparent); +} + +.ks-score-panel { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: var(--space-sm); +} + +.ks-score-panel > div { + min-width: 0; + display: flex; + flex-direction: column; + gap: var(--space-3xs); + padding: var(--space-sm) var(--space-md); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--surface) 72%, transparent); +} + +.ks-score-panel span, +.ks-indexed-snippet span { + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); +} + +.ks-score-panel strong { + min-width: 0; + color: var(--ink); + font-size: var(--text-sm); + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + +.ks-score-panel em { + color: var(--muted); + font-size: var(--text-xs); + font-style: normal; + line-height: var(--leading-tight); +} + +.ks-rendered-matches, +.ks-indexed-snippet { + min-width: 0; + display: flex; + flex-direction: column; + gap: var(--space-sm); + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--accent) 16%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--accent) 5%, transparent); +} + +.ks-rendered-head { + display: flex; + align-items: center; + gap: var(--space-sm); + color: var(--muted); +} + +.ks-rendered-head strong { + color: var(--ink); + font-size: var(--text-sm); +} + +.ks-rendered-head span { + margin-left: auto; + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); +} + +.ks-rendered-record { + min-width: 0; + display: flex; + flex-direction: column; + gap: var(--space-xs); + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--surface) 78%, transparent); +} + +.ks-rendered-record header { + display: flex; + align-items: center; + gap: var(--space-sm); + min-width: 0; +} + +.ks-rendered-record header span, +.ks-rendered-record header em { + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); + font-style: normal; +} + +.ks-rendered-record header strong { + min-width: 0; + color: var(--ink); + font-family: var(--font-mono); + font-size: var(--text-xs); + overflow-wrap: anywhere; +} + +.ks-rendered-record p, +.ks-indexed-snippet p { + margin: 0; + color: var(--ink); + font-size: var(--text-sm); + line-height: var(--leading-snug); + overflow-wrap: anywhere; + display: -webkit-box; + -webkit-line-clamp: 5; + -webkit-box-orient: vertical; + overflow: hidden; +} + +.ks-indexed-snippet { + border-color: color-mix(in srgb, var(--ink) 7%, transparent); + background: color-mix(in srgb, var(--bg) 34%, transparent); +} + +.ks-preview-source { + min-width: 0; + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: var(--space-xs) var(--space-sm); + align-items: center; + padding: var(--space-md); + border: 1px solid color-mix(in srgb, var(--green) 16%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--green) 6%, transparent); +} + +.ks-preview-source span { + grid-column: 1 / -1; + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); +} + +.ks-preview-source code { + min-width: 0; + color: var(--green); + font-family: var(--font-mono); + font-size: var(--text-xs); + overflow-wrap: anywhere; +} + +.ks-preview-source button { + appearance: none; + height: 28px; + display: inline-flex; + align-items: center; + gap: var(--space-xs); + padding: 0 var(--space-sm); + border: 1px solid color-mix(in srgb, var(--green) 24%, transparent); + border-radius: var(--radius-md); + background: color-mix(in srgb, var(--green) 8%, var(--surface)); + color: color-mix(in srgb, var(--green) 88%, var(--ink)); + font-size: var(--text-xs); + font-weight: 650; + cursor: pointer; +} + +.ks-preview-loading { + display: flex; + align-items: center; + gap: var(--space-sm); + color: var(--muted); + font-size: var(--text-sm); +} + +.ks-jsonl-window { + min-width: 0; + display: flex; + flex-direction: column; + overflow: hidden; + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); +} + +.ks-jsonl-window-head { + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--space-md); + padding: var(--space-sm) var(--space-md); + border-bottom: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + background: color-mix(in srgb, var(--bg) 36%, transparent); +} + +.ks-jsonl-window-head strong { + color: var(--ink); + font-size: var(--text-sm); +} + +.ks-jsonl-window-head span { + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); + text-align: right; +} + +.ks-jsonl-record { + border-top: 1px solid color-mix(in srgb, var(--ink) 6%, transparent); + background: color-mix(in srgb, var(--surface) 75%, transparent); +} + +.ks-jsonl-record--matched { + background: color-mix(in srgb, var(--accent) 6%, var(--surface)); +} + +.ks-jsonl-record:first-of-type { + border-top: 0; +} + +.ks-jsonl-record summary { + display: grid; + grid-template-columns: 42px minmax(72px, 0.42fr) minmax(0, 1fr); + gap: var(--space-sm); + align-items: center; + padding: var(--space-sm) var(--space-md); + color: var(--muted); + font-size: var(--text-xs); + cursor: pointer; +} + +.ks-jsonl-record summary span, +.ks-jsonl-record summary strong { + font-family: var(--font-mono); +} + +.ks-jsonl-record summary strong { + color: var(--ink); + overflow-wrap: anywhere; +} + +.ks-jsonl-record summary em { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + font-style: normal; +} + +.ks-jsonl-record pre { + max-height: 240px; + margin: 0; + overflow: auto; + padding: var(--space-md); + border-top: 1px solid color-mix(in srgb, var(--ink) 6%, transparent); + background: color-mix(in srgb, black 20%, transparent); + color: var(--ink); + font-family: var(--font-mono); + font-size: var(--text-xs); + line-height: var(--leading-snug); + white-space: pre-wrap; + overflow-wrap: anywhere; +} + +.ks-indexer-actions { + display: flex; + flex-wrap: wrap; + gap: var(--space-sm); +} + +.ks-indexer-metrics { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: var(--space-sm); +} + +.ks-indexer-metrics > div, +.ks-indexer-facts > div { + min-width: 0; + display: flex; + flex-direction: column; + gap: var(--space-3xs); + padding: var(--space-sm) var(--space-md); + border: 1px solid color-mix(in srgb, var(--ink) 7%, transparent); + border-radius: var(--radius-lg); + background: color-mix(in srgb, var(--bg) 30%, transparent); +} + +.ks-indexer-metrics span, +.ks-indexer-facts span { + color: var(--dim); + font-family: var(--font-mono); + font-size: var(--text-xs); +} + +.ks-indexer-metrics strong, +.ks-indexer-facts strong { + min-width: 0; + color: var(--ink); + font-size: var(--text-sm); + line-height: var(--leading-tight); + overflow-wrap: anywhere; +} + +.ks-indexer-facts { + display: flex; + flex-direction: column; + gap: var(--space-sm); +} + +.ks-spin { + animation: ks-spin 0.9s linear infinite; +} + +@keyframes ks-spin { + from { transform: rotate(0deg); } + to { transform: rotate(360deg); } +} + @media (max-width: 1240px) { .ks-workbench, + .ks-live-grid, .ks-bottom-grid { grid-template-columns: 1fr; } @@ -828,6 +1571,7 @@ @container knowledge-search (max-width: 980px) { .ks-workbench, + .ks-live-grid, .ks-bottom-grid { grid-template-columns: 1fr; } @@ -870,11 +1614,13 @@ .ks-week-grid, .ks-prep-grid, .ks-query-tabs, + .ks-score-panel, .ks-mode-switch { grid-template-columns: 1fr; } .ks-boundary, + .ks-search-form, .ks-chat-row, .ks-drill > div, .ks-prep-card div, diff --git a/packages/web/server/core/broker/service.ts b/packages/web/server/core/broker/service.ts index 2e910330..0300c21a 100644 --- a/packages/web/server/core/broker/service.ts +++ b/packages/web/server/core/broker/service.ts @@ -708,6 +708,27 @@ export async function appendScoutUnblockRequestEvent( await brokerPostJson(broker.baseUrl, scoutBrokerPaths.v1.unblockRequestEvents, event); } +/// Advance an actor's read cursor in a conversation. With no `lastReadMessageId` +/// the broker marks read through the conversation's latest message (and stamps +/// `lastReadAt = now`), which is the "I just opened this thread" case. The broker +/// enforces monotonic progress, so this never rewinds a further-along cursor. +export async function recordScoutBrokerReadCursor( + input: { + conversationId: string; + actorId: string; + lastReadMessageId?: string | null; + lastReadAt?: number; + }, + baseUrl = resolveScoutBrokerUrl(), +): Promise<{ ok: boolean; acknowledgedDeliveries?: number }> { + const path = `${scoutBrokerPaths.v1.conversations}/${encodeURIComponent(input.conversationId)}/read-cursors`; + return brokerPostJson<{ ok: boolean; acknowledgedDeliveries?: number }>(baseUrl, path, { + actorId: input.actorId, + lastReadMessageId: input.lastReadMessageId ?? undefined, + lastReadAt: input.lastReadAt, + }); +} + export async function loadScoutBrokerContext( baseUrl = resolveScoutBrokerUrl(), options: { signal?: AbortSignal } = {}, diff --git a/packages/web/server/core/mobile/service.ts b/packages/web/server/core/mobile/service.ts index 520b33c1..d7b9f093 100644 --- a/packages/web/server/core/mobile/service.ts +++ b/packages/web/server/core/mobile/service.ts @@ -17,13 +17,15 @@ import { upScoutAgent } from "../agents/service.ts"; import { queryFleet } from "../../db-queries.ts"; import { loadScoutBrokerContext, - loadScoutActivityItems, + readScoutBrokerHome, openScoutPeerSession, + recordScoutBrokerReadCursor, registerScoutLocalAgentBinding, sendScoutConversationMessage, sendScoutDirectMessage, sendScoutMessage, - type ScoutActivityItem, + type ScoutBrokerConversationRecord, + type ScoutBrokerHomeActivityRecord, type ScoutBrokerSnapshot, type ScoutDirectMessageResult, } from "../broker/service.ts"; @@ -603,6 +605,40 @@ export async function getScoutFleet( return queryFleet(options); } +/** + * Resolve whatever id the phone routed with onto a real broker conversation. + * The phone may send a conversation id directly (`c.…` from the activity feed, or + * a `dm.…` direct id) or a bare agent id (from the Agents tab). Not every agent + * has an `operator` DM — many only have ask/consult conversations keyed `c.…` — + * so when there's no direct hit and no `dm.operator.{agentId}`, fall back to the + * most-recent conversation the agent actually participates in. + */ +function resolveMobileConversation( + snapshot: ScoutBrokerSnapshot, + rawId: string, +): ScoutBrokerConversationRecord | null { + const direct = snapshot.conversations[rawId]; + if (direct) return direct; + + const operatorDm = snapshot.conversations[`dm.operator.${rawId}`]; + if (operatorDm) return operatorDm; + + const participating = Object.values(snapshot.conversations).filter( + (conversation) => conversation.participantIds?.includes(rawId), + ); + if (participating.length === 0) return null; + + const lastActivityMs = (conversationId: string): number => + Object.values(snapshot.messages).reduce((latest, message) => { + if (message.conversationId !== conversationId) return latest; + return Math.max(latest, normalizeTimestampMs(message.createdAt) ?? 0); + }, 0); + + return participating + .slice() + .sort((a, b) => lastActivityMs(b.id) - lastActivityMs(a.id))[0] ?? null; +} + export async function getScoutMobileSessionSnapshot( conversationId: string, options: { @@ -614,7 +650,7 @@ export async function getScoutMobileSessionSnapshot( void currentDirectory; const broker = await requireMobileRelayContext(); const { snapshot } = broker; - const conversation = snapshot.conversations[conversationId]; + const conversation = resolveMobileConversation(snapshot, conversationId); if (!conversation) { throw new Error(`Unknown mobile session "${conversationId}".`); } @@ -624,7 +660,7 @@ export async function getScoutMobileSessionSnapshot( : null; const endpoint = directAgentId ? endpointForAgent(snapshot, directAgentId) : null; const agent = directAgentId ? snapshot.agents[directAgentId] : null; - const messagePage = pageMessagesForConversation(snapshot, conversationId, options); + const messagePage = pageMessagesForConversation(snapshot, conversation.id, options); const messages = messagePage.messages; const activeFlight = latestActiveFlightForAgent(snapshot, directAgentId); const lastAgentMessageAt = messages @@ -980,13 +1016,13 @@ export type ScoutMobileActivityFilters = { export async function getScoutMobileActivity( filters: ScoutMobileActivityFilters = {}, -): Promise { - return loadScoutActivityItems({ - agentId: filters.agentId, - actorId: filters.actorId, - conversationId: filters.conversationId, - limit: filters.limit ?? 100, - }); +): Promise { + // Home is an orientation surface, so it reads the broker's *curated* home + // activity — one row per message, named actors, always thread-linked — not the + // raw `/v1/activity` lifecycle firehose (ask_opened / flight_updated / …), + // which is an ops feed and stays on the Tail tab. See project_home_purpose. + const home = await readScoutBrokerHome(); + return (home?.activity ?? []).slice(0, filters.limit ?? 100); } // -- Comms (channels + DMs) ---------------------------------------------- @@ -1229,3 +1265,49 @@ export async function sendScoutMobileComms( messageId: result.messageId ?? `local-${Date.now().toString(36)}`, }; } + +/// Mark a conversation read for the phone operator — advances the operator's +/// read cursor so `getScoutMobileConversations` stops counting these messages as +/// unread. The cursor is attributed to `MOBILE_OPERATOR_ID`, which is one of the +/// `operatorActorIds()` the unread tally recognizes. With no `lastReadMessageId` +/// the broker marks read through the latest message, so the badge clears to 0. +export async function markScoutMobileConversationRead(input: { + conversationId: string; + lastReadMessageId?: string | null; +}): Promise<{ conversationId: string; unreadCount: number }> { + const broker = await loadScoutBrokerContext(); + if (!broker) throw new Error("Relay is not reachable."); + if (!broker.snapshot.conversations?.[input.conversationId]) { + throw new Error(`Unknown conversation: ${input.conversationId}`); + } + + // Anchor the cursor on a CONCRETE message id, not broker inference. If we let + // the broker infer (no message id), it auto-fills `lastReadSeq = latestThreadSeq` + // — a small integer — and `resolveReadCursor`'s monotonic guard ranks that + // against existing cursors, which rank by message `createdAt` (a ~1e12 ms + // timestamp). The small seq always loses, so the guard reverts the write and + // `lastReadAt` never advances (the badge never clears). Passing an explicit + // `lastReadMessageId` makes the broker rank by that message's createdAt, which + // is newer than the prior cursor ⇒ it advances. See SCO-061 read-cursor flow. + let lastReadMessageId = input.lastReadMessageId ?? undefined; + if (!lastReadMessageId) { + let latest: { id: string; createdAt: number } | undefined; + for (const m of Object.values(broker.snapshot.messages ?? {})) { + if (m.conversationId !== input.conversationId) continue; + if (!latest || m.createdAt > latest.createdAt) latest = { id: m.id, createdAt: m.createdAt }; + } + lastReadMessageId = latest?.id; + } + + await recordScoutBrokerReadCursor( + { + conversationId: input.conversationId, + actorId: MOBILE_OPERATOR_ID, + lastReadMessageId, + }, + broker.baseUrl, + ); + // Read through the latest message ⇒ caught up. The next list pull reconciles + // if a new inbound message landed in the same instant. + return { conversationId: input.conversationId, unreadCount: 0 }; +} diff --git a/packages/web/server/core/pairing/runtime/bridge/router.ts b/packages/web/server/core/pairing/runtime/bridge/router.ts index 0602efa6..3d0c7bbb 100644 --- a/packages/web/server/core/pairing/runtime/bridge/router.ts +++ b/packages/web/server/core/pairing/runtime/bridge/router.ts @@ -35,13 +35,13 @@ import { getScoutMobileConversations, getScoutMobileConversationMessages, getScoutMobileSessionSnapshot, + markScoutMobileConversationRead, sendScoutMobileComms, sendScoutMobileMessage, } from "../../../mobile/service.ts"; import { provisionMobileTerminalAccess } from "./mobile-terminal-provision.ts"; import { syncMobilePushRegistrationWithRelay } from "@openscout/runtime/mobile-push"; import { - conversationIdForAgent, queryMobileAgentDetail, queryMobileAgents, queryMobileSessions, @@ -829,13 +829,12 @@ const mobileRouter = t.router({ message: "conversationId is required", }); } - // Accept conversation IDs directly, or resolve agent IDs → - // dm.operator.{agentId} (the broker's deterministic convention). - const conversationId = rawId.startsWith("dm.") - ? rawId - : conversationIdForAgent(rawId); + // Pass the routed id straight through — the snapshot service resolves it + // against the live broker snapshot (a `c.…`/`dm.…` conversation id, or a + // bare agent id → its actual conversation). The old `dm.operator.{agentId}` + // wrap was wrong for agents whose conversation is keyed `c.…`. return getScoutMobileSessionSnapshot( - conversationId, + rawId, { beforeTurnId: input.beforeTurnId ?? null, limit: typeof input.limit === "number" ? input.limit : null, @@ -1031,6 +1030,17 @@ const mobileRouter = t.router({ return sendScoutMobileComms(input, resolveMobileCurrentDirectory(), ctx.deviceId); }), + commsMarkRead: procedure + .input( + z.object({ + conversationId: z.string(), + lastReadMessageId: z.string().nullable().optional(), + }), + ) + .mutation(async ({ input }) => { + return markScoutMobileConversationRead(input); + }), + // -- Terminal (in-app SSH/PTY) ------------------------------------------ terminalProvision: procedure .input(z.object({ sshPublicKey: z.string() })) diff --git a/packages/web/server/core/pairing/runtime/bridge/server.ts b/packages/web/server/core/pairing/runtime/bridge/server.ts index 4340e581..a35b552b 100644 --- a/packages/web/server/core/pairing/runtime/bridge/server.ts +++ b/packages/web/server/core/pairing/runtime/bridge/server.ts @@ -35,6 +35,7 @@ import { getScoutMobileSessionSnapshot, getScoutMobileSessions, getScoutMobileWorkspaces, + markScoutMobileConversationRead, sendScoutMobileComms, sendScoutMobileMessage, } from "../../../mobile/service.ts"; @@ -760,6 +761,20 @@ async function handleRPCInner( }; } + case "mobile/comms/read": { + const p = req.params as { conversationId?: string; lastReadMessageId?: string | null }; + if (!p?.conversationId) { + return { id: req.id, error: { code: -32602, message: "conversationId is required" } }; + } + return { + id: req.id, + result: await markScoutMobileConversationRead({ + conversationId: p.conversationId, + lastReadMessageId: p.lastReadMessageId ?? null, + }), + }; + } + // -- Terminal (in-app SSH/PTY) ------------------------------------------ case "mobile/terminal/provision": { diff --git a/packages/web/server/create-openscout-web-server.ts b/packages/web/server/create-openscout-web-server.ts index fe4854dc..b09266ec 100644 --- a/packages/web/server/create-openscout-web-server.ts +++ b/packages/web/server/create-openscout-web-server.ts @@ -1,6 +1,7 @@ import { execFileSync } from "node:child_process"; -import { existsSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs"; -import { dirname, isAbsolute, join, resolve } from "node:path"; +import { createReadStream, existsSync, readFileSync, realpathSync, rmSync, statSync } from "node:fs"; +import { dirname, isAbsolute, join, relative, resolve } from "node:path"; +import { createInterface } from "node:readline"; import { fileURLToPath } from "node:url"; import { homedir } from "node:os"; @@ -91,6 +92,12 @@ import { snapshotRecentEvents, type DiscoveredTranscript, } from "@openscout/runtime/tail"; +import { + indexRecentSessionKnowledge, + resolveOpenScoutKnowledgePaths, + SQLiteKnowledgeStore, + type KnowledgeSourceRef, +} from "@openscout/runtime/knowledge"; import type { ScoutVantageNativeSession } from "@openscout/runtime/vantage-plan"; import { projectSessionsAttention, @@ -592,6 +599,246 @@ function parseOptionalBoolean(value: string | undefined): boolean | undefined { return undefined; } +type HarnessTranscriptSourceRef = Extract; + +type JsonlPreviewRecord = { + index: number; + raw: string; + type?: string; + role?: string; + kind?: string; + summary: string; + renderedText: string; + parsed: boolean; + matched?: boolean; + matchCount?: number; + matchTerms?: string[]; +}; + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + +function stringField(value: unknown, key: string): string | undefined { + if (!isRecord(value)) return undefined; + const field = value[key]; + return typeof field === "string" && field.trim() ? field.trim() : undefined; +} + +function trimPreviewLine(value: string, max = 260): string { + const flat = value.replace(/\s+/g, " ").trim(); + return flat.length <= max ? flat : `${flat.slice(0, Math.max(0, max - 3))}...`; +} + +function previewQueryTerms(query: string | undefined): string[] { + const seen = new Set(); + return (query ?? "") + .split(/[^A-Za-z0-9_./-]+/u) + .map((term) => term.trim()) + .filter((term) => term.length > 1) + .filter((term) => { + const key = term.toLowerCase(); + if (seen.has(key)) return false; + seen.add(key); + return true; + }) + .slice(0, 12); +} + +function matchStats(text: string, terms: string[]): { count: number; terms: string[] } { + if (!text || terms.length === 0) return { count: 0, terms: [] }; + const lower = text.toLowerCase(); + let count = 0; + const matchedTerms: string[] = []; + for (const term of terms) { + const needle = term.toLowerCase(); + let index = lower.indexOf(needle); + let matched = false; + while (index >= 0) { + count++; + matched = true; + index = lower.indexOf(needle, index + needle.length); + } + if (matched) matchedTerms.push(term); + } + return { count, terms: matchedTerms }; +} + +function extractPreviewText(value: unknown): string | null { + if (typeof value === "string") return value; + if (Array.isArray(value)) { + const joined = value + .map((entry) => extractPreviewText(entry)) + .filter((entry): entry is string => Boolean(entry)) + .join(" "); + return joined || null; + } + if (!isRecord(value)) return null; + for (const key of [ + "text", + "message", + "content", + "input", + "arguments", + "args", + "output", + "result", + "prompt", + "command", + "lastPrompt", + "aiTitle", + "summary", + ]) { + const extracted = extractPreviewText(value[key]); + if (extracted) return extracted; + } + return null; +} + +function summarizeJsonlRecord(raw: string, index: number, terms: string[]): JsonlPreviewRecord { + try { + const parsed = JSON.parse(raw) as unknown; + const payload = isRecord(parsed) ? parsed.payload : null; + const message = isRecord(parsed) ? parsed.message : null; + const candidate = payload ?? message ?? parsed; + const type = stringField(parsed, "type") ?? stringField(candidate, "type"); + const role = stringField(parsed, "role") ?? stringField(candidate, "role") ?? stringField(message, "role"); + const kind = stringField(parsed, "kind") ?? stringField(candidate, "kind") ?? type ?? role; + const renderedText = extractPreviewText(candidate) ?? extractPreviewText(parsed) ?? raw; + const summary = trimPreviewLine(renderedText); + const stats = matchStats(`${summary}\n${renderedText}\n${raw}`, terms); + return { + index, + raw, + ...(type ? { type } : {}), + ...(role ? { role } : {}), + ...(kind ? { kind } : {}), + summary, + renderedText, + parsed: true, + matched: stats.count > 0, + matchCount: stats.count, + matchTerms: stats.terms, + }; + } catch { + const stats = matchStats(raw, terms); + return { + index, + raw, + kind: "unparseable", + summary: trimPreviewLine(raw), + renderedText: raw, + parsed: false, + matched: stats.count > 0, + matchCount: stats.count, + matchTerms: stats.terms, + }; + } +} + +function isInsideRoot(root: string, target: string): boolean { + const rel = relative(root, target); + return rel === "" || (!rel.startsWith("..") && !isAbsolute(rel)); +} + +function resolveKnowledgePreviewPath( + sourceRef: HarnessTranscriptSourceRef, + currentDirectory: string, +): string | null { + const paths = resolveOpenScoutKnowledgePaths(); + const controlHome = dirname(paths.knowledgeRoot); + const portable = sourceRef.path; + const relPath = portable.relPath?.trim(); + if (!relPath) return null; + + if (portable.root === "ABSOLUTE") { + const absolute = resolve(relPath); + const trustedRoots = [homedir(), currentDirectory, controlHome].map((root) => resolve(root)); + return trustedRoots.some((root) => isInsideRoot(root, absolute)) ? absolute : null; + } + + const root = portable.root === "HOME" + ? homedir() + : portable.root === "OPENSCOUT_CONTROL_HOME" + ? controlHome + : portable.root === "OPENSCOUT_SUPPORT_DIRECTORY" + ? dirname(controlHome) + : portable.root === "PROJECT_ROOT" + ? currentDirectory + : null; + if (!root) return null; + const resolved = resolve(root, relPath); + return isInsideRoot(resolve(root), resolved) ? resolved : null; +} + +async function readKnowledgeJsonlPreview(input: { + sourceRef: HarnessTranscriptSourceRef; + currentDirectory: string; + contextRecords?: number; + maxRecords?: number; + query?: string; +}) { + const resolvedPath = resolveKnowledgePreviewPath(input.sourceRef, input.currentDirectory); + if (!resolvedPath) { + throw new Error("source path is outside trusted preview roots"); + } + const stats = statSync(resolvedPath); + if (!stats.isFile()) { + throw new Error("source path is not a file"); + } + + const requested = input.sourceRef.recordRange; + const requestedStart = Array.isArray(requested) && Number.isFinite(requested[0]) + ? Math.max(0, Math.floor(requested[0])) + : 0; + const requestedEnd = Array.isArray(requested) && Number.isFinite(requested[1]) + ? Math.max(requestedStart, Math.floor(requested[1])) + : requestedStart + 24; + const contextRecords = Math.min(20, Math.max(0, Math.floor(input.contextRecords ?? 4))); + const maxRecords = Math.min(120, Math.max(1, Math.floor(input.maxRecords ?? 80))); + const start = Math.max(0, requestedStart - contextRecords); + const desiredEnd = requestedEnd + contextRecords; + const end = Math.min(desiredEnd, start + maxRecords - 1); + const terms = previewQueryTerms(input.query); + + const records: JsonlPreviewRecord[] = []; + let index = 0; + let truncatedAfter = false; + const reader = createInterface({ + input: createReadStream(resolvedPath, { encoding: "utf8" }), + crlfDelay: Infinity, + }); + + for await (const line of reader) { + if (index > end) { + truncatedAfter = true; + reader.close(); + break; + } + if (index >= start) { + records.push(summarizeJsonlRecord(line, index, terms)); + } + index++; + } + + const first = records[0]?.index ?? start; + const last = records.at(-1)?.index ?? first; + return { + path: resolvedPath, + sourcePath: input.sourceRef.path, + harness: input.sourceRef.harness, + sessionId: input.sourceRef.sessionId, + requestedRange: requested, + previewRange: [first, last] as [number, number], + records, + recordsRead: records.length, + truncatedBefore: start > 0, + truncatedAfter, + query: input.query, + queryTerms: terms, + }; +} + function optionalString(value: unknown): string | undefined { return typeof value === "string" ? value : undefined; } @@ -2202,6 +2449,88 @@ export async function createOpenScoutWebServer( ); app.get("/api/build", (c) => c.json(loadOpenScoutBuildInfo(currentDirectory))); + app.get("/api/knowledge/status", (c) => { + const store = new SQLiteKnowledgeStore(); + try { + return c.json(store.status()); + } finally { + store.close(); + } + }); + + app.get("/api/knowledge/search", (c) => { + const q = c.req.query("q") ?? ""; + const limit = parseOptionalPositiveInt(c.req.query("limit"), 30) ?? 30; + const store = new SQLiteKnowledgeStore(); + try { + return c.json({ + q, + hits: store.searchLexical({ + q, + sourceKinds: ["sessions"], + limit, + mode: "lexical", + }), + status: store.status(), + }); + } finally { + store.close(); + } + }); + + app.post("/api/knowledge/source-preview", async (c) => { + const body = (await c.req.json().catch(() => ({}))) as { + sourceRef?: unknown; + contextRecords?: unknown; + maxRecords?: unknown; + q?: unknown; + }; + const sourceRef = body.sourceRef; + if (!isRecord(sourceRef) || sourceRef.kind !== "harness_transcript") { + return c.json({ error: "sourceRef must be a harness transcript ref" }, 400); + } + try { + return c.json(await readKnowledgeJsonlPreview({ + sourceRef: sourceRef as HarnessTranscriptSourceRef, + currentDirectory, + contextRecords: typeof body.contextRecords === "number" ? body.contextRecords : undefined, + maxRecords: typeof body.maxRecords === "number" ? body.maxRecords : undefined, + query: typeof body.q === "string" ? body.q : undefined, + })); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + const status = message.includes("trusted preview roots") ? 403 : 500; + return c.json({ error: message }, status as 403 | 500); + } + }); + + app.post("/api/knowledge/sessions/index", async (c) => { + const body = (await c.req.json().catch(() => ({}))) as { + days?: unknown; + limit?: unknown; + force?: unknown; + }; + const days = typeof body.days === "number" && Number.isFinite(body.days) + ? body.days + : 3; + const limit = typeof body.limit === "number" && Number.isFinite(body.limit) + ? body.limit + : 220; + const force = body.force === true; + try { + const result = await indexRecentSessionKnowledge({ days, limit, force }); + const store = new SQLiteKnowledgeStore(); + try { + return c.json({ result, status: store.status() }); + } finally { + store.close(); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return c.json({ error: message }, 500); + } + }); + app.get("/api/ui/scenes", async (c) => { const settings = await readOpenScoutSettings({ currentDirectory }).catch(() => null); return c.json(settings?.ui ?? { scenes: [], activeSceneIdBySurface: {} });