Skip to content

Commit 6c7394e

Browse files
committed
feat: add workspace-level semantic search across v2 notebooks
1 parent cf9c5a2 commit 6c7394e

4 files changed

Lines changed: 169 additions & 1 deletion

File tree

openapi.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,36 @@ paths:
705705
type: array
706706
items:
707707
$ref: '#/components/schemas/Notebook'
708+
709+
/api/v2/workspaces/{workspaceId}/search:
710+
post:
711+
summary: Workspace semantic search across notebooks
712+
operationId: workspaceSearchV2
713+
tags: [notebooks-v2]
714+
x-auth-scope: read
715+
parameters:
716+
- $ref: '#/components/parameters/WorkspaceId'
717+
requestBody:
718+
required: true
719+
content:
720+
application/json:
721+
schema:
722+
type: object
723+
required: [query]
724+
properties:
725+
query:
726+
type: string
727+
limit:
728+
type: integer
729+
minimum: 1
730+
maximum: 50
731+
threshold:
732+
type: number
733+
minimum: 0
734+
maximum: 1
735+
responses:
736+
'200':
737+
description: Workspace-scoped search results with notebook attribution
708738
post:
709739
summary: Create notebook
710740
operationId: createNotebookV2

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { telemetry } from './routes/telemetry'
1414
import { chat } from './routes/chat'
1515
import { notebooksV2 } from './routes/notebooks-v2'
1616
import { notebooksV2Manage } from './routes/notebooks-v2-manage'
17+
import { workspacesV2 } from './routes/workspaces-v2'
1718
import { handleIngestion } from './queue'
1819

1920
const app = new Hono<{ Bindings: Env; Variables: AppVariables }>()
@@ -82,6 +83,7 @@ app.route('/api/conversations', conversations)
8283
// --- Notebook v2: workspace-scoped source intelligence ---
8384
app.use('/api/v2/workspaces/*', requireAuth('read'))
8485
app.use('/api/v2/workspaces/*', rateLimit({ maxRequests: 30, windowSeconds: 60 }))
86+
app.route('/api/v2/workspaces/:workspaceId', workspacesV2)
8587
app.route('/api/v2/workspaces/:workspaceId/notebooks', notebooksV2)
8688
app.route('/api/v2/workspaces/:workspaceId/notebooks', notebooksV2Manage)
8789

src/lib/vectorize.ts

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import type {
1919

2020
const TEXT_PREVIEW_LENGTH = 500
2121

22-
export class VectorStore {
22+
export class VectorStore {
2323
private vectorize: VectorizeIndex
2424
private kv: KVNamespace
2525

@@ -147,6 +147,72 @@ export class VectorStore {
147147
})
148148
)
149149
}
150+
151+
/**
152+
* Workspace-style semantic search across multiple notebook IDs.
153+
* Filters results in application layer using Vectorize metadata pointers.
154+
*/
155+
async searchByNotebookIds(
156+
queryVector: number[],
157+
notebookIds: string[],
158+
limit: number = 10,
159+
scoreThreshold: number = 0.3,
160+
options: { hydrateFullText?: boolean } = {}
161+
): Promise<Array<SearchResult & { notebook_id?: string }>> {
162+
const { hydrateFullText = true } = options
163+
if (notebookIds.length === 0) return []
164+
165+
const notebookIdSet = new Set(notebookIds)
166+
const results = await this.vectorize.query(queryVector, {
167+
topK: Math.max(limit * 10, 100),
168+
returnMetadata: 'all',
169+
})
170+
171+
if (!results.matches || results.matches.length === 0) return []
172+
173+
const filtered = results.matches
174+
.filter((m) => {
175+
const meta = m.metadata as unknown as ConversationMetadata
176+
return !!meta?.notebook_id && notebookIdSet.has(meta.notebook_id)
177+
})
178+
.filter((m) => (m.score ?? 0) >= scoreThreshold)
179+
.slice(0, limit)
180+
181+
if (!hydrateFullText) {
182+
return filtered.map((match) => {
183+
const meta = match.metadata as unknown as ConversationMetadata
184+
const convId = meta?.id ?? match.id
185+
return {
186+
id: convId,
187+
title: meta?.title ?? 'Untitled',
188+
text: meta?.text_preview ?? '',
189+
create_time: meta?.create_time ?? 0,
190+
score: match.score ?? 0,
191+
notebook_id: meta?.notebook_id,
192+
}
193+
})
194+
}
195+
196+
return Promise.all(
197+
filtered.map(async (match) => {
198+
const meta = match.metadata as unknown as ConversationMetadata
199+
const convId = meta?.id ?? match.id
200+
const fullRaw = await this.kv.get(`conv:${convId}`)
201+
const full: ConversationRecord | null = fullRaw
202+
? JSON.parse(fullRaw)
203+
: null
204+
205+
return {
206+
id: convId,
207+
title: full?.title ?? meta?.title ?? 'Untitled',
208+
text: full?.text ?? meta?.text_preview ?? '',
209+
create_time: full?.create_time ?? meta?.create_time ?? 0,
210+
score: match.score ?? 0,
211+
notebook_id: full?.notebook_id ?? meta?.notebook_id,
212+
}
213+
})
214+
)
215+
}
150216

151217
/**
152218
* Get a single conversation by ID from KV.

src/routes/workspaces-v2.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { Hono } from 'hono'
2+
import type { Env } from '../lib/types'
3+
import { generateQueryEmbedding } from '../lib/embeddings'
4+
import { VectorStore } from '../lib/vectorize'
5+
6+
const workspacesV2 = new Hono<{ Bindings: Env }>()
7+
8+
function requireParam(value: string | undefined, name: string): string {
9+
if (!value) throw new Error(`Missing path param: ${name}`)
10+
return value
11+
}
12+
13+
workspacesV2.post('/search', async (c) => {
14+
const workspaceId = requireParam(c.req.param('workspaceId'), 'workspaceId')
15+
const body = await c.req.json<{
16+
query: string
17+
limit?: number
18+
threshold?: number
19+
}>()
20+
21+
if (!body.query?.trim()) {
22+
return c.json({ error: 'query is required' }, 400)
23+
}
24+
if (!c.env.DB) {
25+
return c.json({ error: 'D1 DB binding is required for workspace search' }, 501)
26+
}
27+
28+
const notebookRows = await c.env.DB
29+
.prepare(
30+
`SELECT id, title
31+
FROM notebooks
32+
WHERE workspace_id = ? AND deleted_at IS NULL`
33+
)
34+
.bind(workspaceId)
35+
.all<{ id: string; title: string }>()
36+
37+
const notebookIds = notebookRows.results.map((n) => n.id)
38+
const notebookTitleById = new Map(notebookRows.results.map((n) => [n.id, n.title]))
39+
40+
if (notebookIds.length === 0) {
41+
return c.json({ query: body.query, count: 0, results: [] })
42+
}
43+
44+
const queryVector = await generateQueryEmbedding(body.query, c.env)
45+
const store = new VectorStore(c.env)
46+
const limit = Math.min(Math.max(body.limit ?? 10, 1), 50)
47+
const threshold = body.threshold ?? 0
48+
49+
const hits = await store.searchByNotebookIds(
50+
queryVector,
51+
notebookIds,
52+
limit,
53+
threshold,
54+
{ hydrateFullText: true }
55+
)
56+
57+
return c.json({
58+
query: body.query,
59+
count: hits.length,
60+
results: hits.map((hit) => ({
61+
...hit,
62+
notebook_id: hit.notebook_id ?? null,
63+
notebook_title: hit.notebook_id
64+
? (notebookTitleById.get(hit.notebook_id) ?? null)
65+
: null,
66+
})),
67+
})
68+
})
69+
70+
export { workspacesV2 }

0 commit comments

Comments
 (0)