diff --git a/src/index.ts b/src/index.ts index 67636ff..b55d1fd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,6 +56,12 @@ async function initializeDatabase(env: Env): Promise { } catch (e) { console.error("Database initialization error (non-fatal):", e); } + for (const alter of [ + `ALTER TABLE entries ADD COLUMN recall_count INTEGER DEFAULT 0`, + `ALTER TABLE entries ADD COLUMN importance_score INTEGER DEFAULT 0`, + ]) { + try { await env.DB.exec(alter); } catch { /* column already exists — no-op */ } + } } // ─── Duplicate detection ────────────────────────────────────────────────────── @@ -130,7 +136,10 @@ export function getHalfLifeMs(tags: string[]): number { return 30 * 24 * 60 * 60 * 1000; // 30 days default } -export function rerankWithTimeDecay(matches: VectorizeMatch[]): VectorizeMatch[] { +export function rerankWithTimeDecay( + matches: VectorizeMatch[], + recallCounts: Map = new Map() +): VectorizeMatch[] { const now = Date.now(); return matches @@ -139,11 +148,15 @@ export function rerankWithTimeDecay(matches: VectorizeMatch[]): VectorizeMatch[] const createdAt = meta?.created_at ?? now; const tags: string[] = Array.isArray(meta?.tags) ? meta.tags : []; const ageMs = now - createdAt; + const parentId = (meta?.parentId ?? match.id) as string; + const rc = recallCounts.get(parentId) ?? 0; const halfLifeMs = getHalfLifeMs(tags); const recencyMultiplier = Math.exp(-ageMs / halfLifeMs); + // log(1+0)=0 would zero out unrecalled entries; (1 + log1p(rc)) gives baseline 1.0 + const frequencyMultiplier = 1 + Math.log1p(rc); - return { ...match, score: match.score * recencyMultiplier }; + return { ...match, score: match.score * recencyMultiplier * frequencyMultiplier }; }) .sort((a, b) => b.score - a.score); } @@ -196,6 +209,36 @@ export function parseTimePhrase(query: string, now: number): { after?: number; b return { cleanQuery: query }; } +// ─── AI importance scoring ──────────────────────────────────────────────────── + +async function scoreImportance(content: string, env: Env): Promise { + try { + const stream = await env.AI.run("@cf/meta/llama-4-scout-17b-16e-instruct" as any, { + messages: [{ role: "user", content: + `Rate the long-term importance of this memory 1-5. Reply with only a single digit.\n1=trivial 3=useful context 5=critical decision or goal\n\nMemory: ${content.slice(0, 500)}` + }], + stream: true, + }); + const reader = (stream as ReadableStream).getReader(); + const decoder = new TextDecoder(); + let text = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + decoder.decode(value).split('\n').forEach(line => { + if (line.startsWith('data: ') && !line.includes('[DONE]')) { + try { const d = JSON.parse(line.slice(6)); if (d.response) text += d.response; } catch {} + } + }); + } + reader.releaseLock(); + const score = parseInt(text.trim(), 10); + return score >= 1 && score <= 5 ? score : 3; + } catch { + return 0; + } +} + // ─── Hashtag extraction ─────────────────────────────────────────────────────── export function extractHashtags(content: string): { cleanContent: string; hashtags: string[] } { @@ -356,6 +399,10 @@ function buildMcpServer(env: Env): McpServer { console.error("Vectorize insert failed (non-fatal):", e); } + scoreImportance(c, env) + .then(score => env.DB.prepare(`UPDATE entries SET importance_score = ? WHERE id = ?`).bind(score, id).run()) + .catch(e => console.error("Importance scoring failed (non-fatal):", e)); + if (dup.status === "flagged") { return { content: [{ @@ -462,7 +509,15 @@ function buildMcpServer(env: Env): McpServer { return { content: [{ type: "text", text: "Nothing found matching that query." }] }; } - const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[]); + // Fetch recall_count for all candidates to use in scoring + const candidateIds = [...new Set(results.matches.map(m => (m.metadata as any)?.parentId ?? m.id))] as string[]; + const rcPlaceholders = candidateIds.map(() => "?").join(", "); + const { results: rcRows } = await env.DB.prepare( + `SELECT id, recall_count FROM entries WHERE id IN (${rcPlaceholders})` + ).bind(...candidateIds).all() as { results: { id: string; recall_count: number }[] }; + const recallCounts = new Map(rcRows.map(r => [r.id, r.recall_count ?? 0])); + + const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[], recallCounts); const seen = new Set(); const deduped = reranked.filter((m) => { @@ -489,6 +544,13 @@ function buildMcpServer(env: Env): McpServer { const d1Map = new Map(d1Rows.map((r) => [r.id as string, r])); + // Increment recall_count for entries actually shown (fire-and-forget) + Promise.all( + [...d1Map.keys()].map(id => + env.DB.prepare(`UPDATE entries SET recall_count = recall_count + 1 WHERE id = ?`).bind(id).run() + ) + ).catch(e => console.error("recall_count update failed (non-fatal):", e)); + const text = deduped.map((m, i) => { const meta = m.metadata as Record; const parentId = (meta?.parentId ?? m.id) as string; @@ -633,6 +695,11 @@ export default { storeEntry(env, id, c, finalTags, s, now) .catch((e) => console.error("Async embed failed:", e)) ); + ctx.waitUntil( + scoreImportance(c, env) + .then(score => env.DB.prepare(`UPDATE entries SET importance_score = ? WHERE id = ?`).bind(score, id).run()) + .catch(e => console.error("Importance scoring failed (non-fatal):", e)) + ); if (dup.status === "flagged") { return json({ diff --git a/test/helpers/d1-mock.ts b/test/helpers/d1-mock.ts index a9b8583..ad29534 100644 --- a/test/helpers/d1-mock.ts +++ b/test/helpers/d1-mock.ts @@ -9,7 +9,7 @@ export class D1Mock { async run() { if (s.startsWith("INSERT INTO entries")) { const [id, content, tags, source, created_at, vector_ids] = args; - db.entries.push({ id, content, tags, source, created_at, vector_ids }); + db.entries.push({ id, content, tags, source, created_at, vector_ids, recall_count: 0, importance_score: 0 }); return { meta: { changes: 1 } }; } if (s.startsWith("UPDATE entries SET vector_ids")) { @@ -24,6 +24,18 @@ export class D1Mock { if (row) row.content = content; return { meta: { changes: row ? 1 : 0 } }; } + if (s.startsWith("UPDATE entries SET recall_count")) { + const [id] = args; + const row = db.entries.find((e: any) => e.id === id); + if (row) row.recall_count = (row.recall_count ?? 0) + 1; + return { meta: { changes: row ? 1 : 0 } }; + } + if (s.startsWith("UPDATE entries SET importance_score")) { + const [score, id] = args; + const row = db.entries.find((e: any) => e.id === id); + if (row) row.importance_score = score; + return { meta: { changes: row ? 1 : 0 } }; + } if (s.startsWith("DELETE FROM entries WHERE id")) { const [id] = args; const before = db.entries.length; @@ -43,6 +55,12 @@ export class D1Mock { return null; }, async all() { + if (s.includes("recall_count FROM entries")) { + const results = db.entries + .filter((e: any) => args.includes(e.id)) + .map((e: any) => ({ id: e.id, recall_count: e.recall_count ?? 0 })); + return { results }; + } if (s.includes("json_each(entries.tags)")) { const tags = new Set(); db.entries.forEach((e: any) => { diff --git a/test/helpers/make-env.ts b/test/helpers/make-env.ts index eb5acb3..4e6c482 100644 --- a/test/helpers/make-env.ts +++ b/test/helpers/make-env.ts @@ -21,7 +21,7 @@ export function makeAIMock(): Ai { return { data: [new Array(384).fill(0.1)] }; return new ReadableStream({ start(c) { - c.enqueue(new TextEncoder().encode('data: {"response":"ok"}\n\n')); + c.enqueue(new TextEncoder().encode('data: {"response":"3"}\n\n')); c.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); c.close(); }, diff --git a/test/integration/capture.test.ts b/test/integration/capture.test.ts index 1dddb10..cbc686b 100644 --- a/test/integration/capture.test.ts +++ b/test/integration/capture.test.ts @@ -22,6 +22,16 @@ describe("POST /capture", () => { env = makeTestEnv(db); }); + it("stores importance_score after async AI scoring completes", async () => { + const { ctx, drain } = makeCtx(); + const res = await worker.fetch(req("POST", "/capture", { body: { content: "Decided to switch to TypeScript for all new projects" } }), env, ctx); + expect(res.status).toBe(200); + await drain(); + expect(db.entries).toHaveLength(1); + expect(db.entries[0].importance_score).toBeGreaterThanOrEqual(1); + expect(db.entries[0].importance_score).toBeLessThanOrEqual(5); + }); + it("returns 400 when content is missing", async () => { const { ctx } = makeCtx(); const res = await worker.fetch(req("POST", "/capture", { body: {} }), env, ctx); diff --git a/test/unit/rerank.test.ts b/test/unit/rerank.test.ts index ecd1a5b..c61d429 100644 --- a/test/unit/rerank.test.ts +++ b/test/unit/rerank.test.ts @@ -14,7 +14,7 @@ describe("rerankWithTimeDecay", () => { match("old", 0.9, NOW - 60 * MS_DAY), match("new", 0.9, NOW - 1 * MS_DAY), ]; - const result = rerankWithTimeDecay(matches); + const result = rerankWithTimeDecay(matches, new Map()); expect(result[0].id).toBe("new"); }); @@ -24,7 +24,7 @@ describe("rerankWithTimeDecay", () => { match("b", 0.9, NOW - 30 * MS_DAY), match("c", 0.7, NOW - 30 * MS_DAY), ]; - const result = rerankWithTimeDecay(matches); + const result = rerankWithTimeDecay(matches, new Map()); for (let i = 0; i < result.length - 1; i++) { expect(result[i].score).toBeGreaterThanOrEqual(result[i + 1].score); } @@ -32,7 +32,7 @@ describe("rerankWithTimeDecay", () => { it("produces no NaN scores", () => { const matches = [match("x", 0.5, 0), match("y", 0.5, NOW)]; - rerankWithTimeDecay(matches).forEach(m => { + rerankWithTimeDecay(matches, new Map()).forEach(m => { expect(Number.isNaN(m.score)).toBe(false); }); }); @@ -40,8 +40,29 @@ describe("rerankWithTimeDecay", () => { it("task tag decays faster than context tag at same age", () => { const taskMatch = match("task-entry", 1.0, NOW - 30 * MS_DAY, ["task"]); const contextMatch = match("ctx-entry", 1.0, NOW - 30 * MS_DAY, ["context"]); - const [t] = rerankWithTimeDecay([taskMatch]); - const [c] = rerankWithTimeDecay([contextMatch]); + const [t] = rerankWithTimeDecay([taskMatch], new Map()); + const [c] = rerankWithTimeDecay([contextMatch], new Map()); expect(c.score).toBeGreaterThan(t.score); }); + + it("entry with higher recall_count ranks above equal-scored entry with zero recalls", () => { + const fresh = match("fresh", 0.9, NOW - 1 * MS_DAY); + const recalled = match("recalled", 0.9, NOW - 1 * MS_DAY); + const counts = new Map([["recalled", 10]]); + const result = rerankWithTimeDecay([fresh, recalled], counts); + expect(result[0].id).toBe("recalled"); + }); + + it("entry with recall_count=0 still produces a positive score (baseline multiplier = 1.0)", () => { + const m = match("entry", 0.8, NOW - 5 * MS_DAY); + const [result] = rerankWithTimeDecay([m], new Map()); + expect(result.score).toBeGreaterThan(0); + }); + + it("omitting recallCounts parameter behaves identically to passing an empty Map", () => { + const matches = [match("a", 0.9, NOW - 10 * MS_DAY)]; + const withEmpty = rerankWithTimeDecay(matches, new Map()); + const withDefault = rerankWithTimeDecay(matches); + expect(withDefault[0].score).toBeCloseTo(withEmpty[0].score, 10); + }); }); diff --git a/vitest.config.ts b/vitest.config.ts index a9ac6a7..c8d70c4 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -8,7 +8,7 @@ export default defineConfig({ coverage: { provider: "v8", include: ["src/**/*.ts"], - reporter: ["text", "html", "json-summary"], + reporter: ["text", "html", "json-summary", "json"], reportsDirectory: "coverage", }, },