diff --git a/src/index.ts b/src/index.ts index 68bdab6..67636ff 100644 --- a/src/index.ts +++ b/src/index.ts @@ -196,6 +196,14 @@ export function parseTimePhrase(query: string, now: number): { after?: number; b return { cleanQuery: query }; } +// ─── Hashtag extraction ─────────────────────────────────────────────────────── + +export function extractHashtags(content: string): { cleanContent: string; hashtags: string[] } { + const hashtags = (content.match(/#\w+/g) ?? []).map(t => t.slice(1).toLowerCase()); + const cleanContent = content.replace(/#\w+/g, '').replace(/\s+/g, ' ').trim(); + return { cleanContent, hashtags }; +} + // ─── Store entry (full embed + chunk) ──────────────────────────────────────── // Returns the list of vector IDs inserted so forget() can clean up exactly. @@ -317,8 +325,10 @@ function buildMcpServer(env: Env): McpServer { source: z.string().optional().describe("Origin: phone, browser, voice, claude"), }, async ({ content, tags, source }) => { - const c = content.trim(); - const t = tags ?? []; + const raw = content.trim(); + const { cleanContent, hashtags } = extractHashtags(raw); + const c = cleanContent || raw; + const t = [...new Set([...(tags ?? []).map(tag => tag.toLowerCase()), ...hashtags])]; const s = source ?? "claude"; const dup = await checkDuplicate(c, env); @@ -593,8 +603,10 @@ export default { try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); } if (!body.content?.trim()) return json({ error: "content is required" }, 400); - const c = body.content.trim(); - const t = body.tags ?? []; + const raw = body.content.trim(); + const { cleanContent, hashtags } = extractHashtags(raw); + const c = cleanContent || raw; + const t = [...new Set([...(body.tags ?? []).map(tag => tag.toLowerCase()), ...hashtags])]; const s = body.source ?? "api"; const dup = await checkDuplicate(c, env); diff --git a/test/integration/capture.test.ts b/test/integration/capture.test.ts index 1a7d3d9..1dddb10 100644 --- a/test/integration/capture.test.ts +++ b/test/integration/capture.test.ts @@ -63,6 +63,51 @@ describe("POST /capture", () => { expect(db.entries).toHaveLength(0); }); + it("extracts hashtags from content and stores clean content with tags", async () => { + const { ctx, drain } = makeCtx(); + const res = await worker.fetch(req("POST", "/capture", { body: { content: "went for a run #health #fitness" } }), env, ctx); + await drain(); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(db.entries).toHaveLength(1); + expect(db.entries[0].content).toBe("went for a run"); + const tags = JSON.parse(db.entries[0].tags); + expect(tags).toContain("health"); + expect(tags).toContain("fitness"); + }); + + it("merges hashtag tags with explicit tags and deduplicates case-insensitively", async () => { + const { ctx, drain } = makeCtx(); + const res = await worker.fetch(req("POST", "/capture", { body: { content: "note #health", tags: ["Health", "fitness"] } }), env, ctx); + await drain(); + expect(res.status).toBe(200); + const tags: string[] = JSON.parse(db.entries[0].tags); + const healthCount = tags.filter(t => t === "health").length; + expect(healthCount).toBe(1); + expect(tags).toContain("fitness"); + }); + + it("behaves identically when no hashtags are present (regression)", async () => { + const { ctx, drain } = makeCtx(); + const res = await worker.fetch(req("POST", "/capture", { body: { content: "plain note", tags: ["work"] } }), env, ctx); + await drain(); + expect(res.status).toBe(200); + expect(db.entries[0].content).toBe("plain note"); + const tags = JSON.parse(db.entries[0].tags); + expect(tags).toEqual(["work"]); + }); + + it("falls back to original content when input is only hashtags", async () => { + const { ctx, drain } = makeCtx(); + const res = await worker.fetch(req("POST", "/capture", { body: { content: "#task" } }), env, ctx); + await drain(); + expect(res.status).toBe(200); + expect(db.entries[0].content).toBe("#task"); + const tags = JSON.parse(db.entries[0].tags); + expect(tags).toContain("task"); + }); + it("stores flagged duplicate (score 0.85–0.94) with duplicate-candidate tag", async () => { const vectorize = makeVectorizeMock({ query: vi.fn().mockResolvedValue({ diff --git a/test/unit/extract-hashtags.test.ts b/test/unit/extract-hashtags.test.ts new file mode 100644 index 0000000..f528c62 --- /dev/null +++ b/test/unit/extract-hashtags.test.ts @@ -0,0 +1,52 @@ +import { describe, it, expect } from "vitest"; +import { extractHashtags } from "../../src/index"; + +describe("extractHashtags", () => { + it("returns empty hashtags and unchanged content when no hashtags present", () => { + const { cleanContent, hashtags } = extractHashtags("plain text"); + expect(cleanContent).toBe("plain text"); + expect(hashtags).toEqual([]); + }); + + it("extracts a single hashtag and strips it from content", () => { + const { cleanContent, hashtags } = extractHashtags("note #health"); + expect(cleanContent).toBe("note"); + expect(hashtags).toEqual(["health"]); + }); + + it("extracts multiple hashtags", () => { + const { cleanContent, hashtags } = extractHashtags("note #health #fitness"); + expect(cleanContent).toBe("note"); + expect(hashtags).toEqual(["health", "fitness"]); + }); + + it("extracts a hashtag mid-sentence and collapses whitespace", () => { + const { cleanContent, hashtags } = extractHashtags("went #health for a run"); + expect(cleanContent).toBe("went for a run"); + expect(hashtags).toEqual(["health"]); + }); + + it("lowercases hashtags", () => { + const { cleanContent, hashtags } = extractHashtags("note #Health #FITNESS"); + expect(cleanContent).toBe("note"); + expect(hashtags).toEqual(["health", "fitness"]); + }); + + it("returns empty cleanContent when content is only hashtags", () => { + const { cleanContent, hashtags } = extractHashtags("#task"); + expect(cleanContent).toBe(""); + expect(hashtags).toEqual(["task"]); + }); + + it("collapses extra whitespace left by removed hashtags", () => { + const { cleanContent, hashtags } = extractHashtags("a #b c"); + expect(cleanContent).toBe("a c"); + expect(hashtags).toEqual(["b"]); + }); + + it("handles hashtags with underscores and digits", () => { + const { cleanContent, hashtags } = extractHashtags("note #tag_1 #item2"); + expect(cleanContent).toBe("note"); + expect(hashtags).toEqual(["tag_1", "item2"]); + }); +});