Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ export function parseTimePhrase(query: string, now: number): { after?: number; b
return { cleanQuery: query };
}

// ─── Hashtag extraction ───────────────────────────────────────────────────────

export function extractHashtags(content: string): { cleanContent: string; hashtags: string[] } {
const hashtags = (content.match(/#\w+/g) ?? []).map(t => t.slice(1).toLowerCase());
const cleanContent = content.replace(/#\w+/g, '').replace(/\s+/g, ' ').trim();
return { cleanContent, hashtags };
}

// ─── Store entry (full embed + chunk) ────────────────────────────────────────
// Returns the list of vector IDs inserted so forget() can clean up exactly.

Expand Down Expand Up @@ -317,8 +325,10 @@ function buildMcpServer(env: Env): McpServer {
source: z.string().optional().describe("Origin: phone, browser, voice, claude"),
},
async ({ content, tags, source }) => {
const c = content.trim();
const t = tags ?? [];
const raw = content.trim();
const { cleanContent, hashtags } = extractHashtags(raw);
const c = cleanContent || raw;
const t = [...new Set([...(tags ?? []).map(tag => tag.toLowerCase()), ...hashtags])];
const s = source ?? "claude";

const dup = await checkDuplicate(c, env);
Expand Down Expand Up @@ -593,8 +603,10 @@ export default {
try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); }
if (!body.content?.trim()) return json({ error: "content is required" }, 400);

const c = body.content.trim();
const t = body.tags ?? [];
const raw = body.content.trim();
const { cleanContent, hashtags } = extractHashtags(raw);
const c = cleanContent || raw;
const t = [...new Set([...(body.tags ?? []).map(tag => tag.toLowerCase()), ...hashtags])];
const s = body.source ?? "api";

const dup = await checkDuplicate(c, env);
Expand Down
45 changes: 45 additions & 0 deletions test/integration/capture.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,51 @@ describe("POST /capture", () => {
expect(db.entries).toHaveLength(0);
});

it("extracts hashtags from content and stores clean content with tags", async () => {
const { ctx, drain } = makeCtx();
const res = await worker.fetch(req("POST", "/capture", { body: { content: "went for a run #health #fitness" } }), env, ctx);
await drain();
expect(res.status).toBe(200);
const data = await res.json() as any;
expect(data.ok).toBe(true);
expect(db.entries).toHaveLength(1);
expect(db.entries[0].content).toBe("went for a run");
const tags = JSON.parse(db.entries[0].tags);
expect(tags).toContain("health");
expect(tags).toContain("fitness");
});

it("merges hashtag tags with explicit tags and deduplicates case-insensitively", async () => {
const { ctx, drain } = makeCtx();
const res = await worker.fetch(req("POST", "/capture", { body: { content: "note #health", tags: ["Health", "fitness"] } }), env, ctx);
await drain();
expect(res.status).toBe(200);
const tags: string[] = JSON.parse(db.entries[0].tags);
const healthCount = tags.filter(t => t === "health").length;
expect(healthCount).toBe(1);
expect(tags).toContain("fitness");
});

it("behaves identically when no hashtags are present (regression)", async () => {
const { ctx, drain } = makeCtx();
const res = await worker.fetch(req("POST", "/capture", { body: { content: "plain note", tags: ["work"] } }), env, ctx);
await drain();
expect(res.status).toBe(200);
expect(db.entries[0].content).toBe("plain note");
const tags = JSON.parse(db.entries[0].tags);
expect(tags).toEqual(["work"]);
});

it("falls back to original content when input is only hashtags", async () => {
const { ctx, drain } = makeCtx();
const res = await worker.fetch(req("POST", "/capture", { body: { content: "#task" } }), env, ctx);
await drain();
expect(res.status).toBe(200);
expect(db.entries[0].content).toBe("#task");
const tags = JSON.parse(db.entries[0].tags);
expect(tags).toContain("task");
});

it("stores flagged duplicate (score 0.85–0.94) with duplicate-candidate tag", async () => {
const vectorize = makeVectorizeMock({
query: vi.fn().mockResolvedValue({
Expand Down
52 changes: 52 additions & 0 deletions test/unit/extract-hashtags.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { describe, it, expect } from "vitest";
import { extractHashtags } from "../../src/index";

describe("extractHashtags", () => {
it("returns empty hashtags and unchanged content when no hashtags present", () => {
const { cleanContent, hashtags } = extractHashtags("plain text");
expect(cleanContent).toBe("plain text");
expect(hashtags).toEqual([]);
});

it("extracts a single hashtag and strips it from content", () => {
const { cleanContent, hashtags } = extractHashtags("note #health");
expect(cleanContent).toBe("note");
expect(hashtags).toEqual(["health"]);
});

it("extracts multiple hashtags", () => {
const { cleanContent, hashtags } = extractHashtags("note #health #fitness");
expect(cleanContent).toBe("note");
expect(hashtags).toEqual(["health", "fitness"]);
});

it("extracts a hashtag mid-sentence and collapses whitespace", () => {
const { cleanContent, hashtags } = extractHashtags("went #health for a run");
expect(cleanContent).toBe("went for a run");
expect(hashtags).toEqual(["health"]);
});

it("lowercases hashtags", () => {
const { cleanContent, hashtags } = extractHashtags("note #Health #FITNESS");
expect(cleanContent).toBe("note");
expect(hashtags).toEqual(["health", "fitness"]);
});

it("returns empty cleanContent when content is only hashtags", () => {
const { cleanContent, hashtags } = extractHashtags("#task");
expect(cleanContent).toBe("");
expect(hashtags).toEqual(["task"]);
});

it("collapses extra whitespace left by removed hashtags", () => {
const { cleanContent, hashtags } = extractHashtags("a #b c");
expect(cleanContent).toBe("a c");
expect(hashtags).toEqual(["b"]);
});

it("handles hashtags with underscores and digits", () => {
const { cleanContent, hashtags } = extractHashtags("note #tag_1 #item2");
expect(cleanContent).toBe("note");
expect(hashtags).toEqual(["tag_1", "item2"]);
});
});
Loading