diff --git a/.claude/codebase/patterns.md b/.claude/codebase/patterns.md index e483d317..9750c78e 100644 --- a/.claude/codebase/patterns.md +++ b/.claude/codebase/patterns.md @@ -1,6 +1,6 @@ - + # Code Patterns & Conventions @@ -48,6 +48,11 @@ tea.Msg -> Update(msg) ## CRUD Patterns (internal/data/) +### Per-Entity Files +CRUD lives in `store_.go` files (vendor, appliance, project, +quote, maintenance, incident, document, house, servicelog). store.go +holds shared open/close/migrate logic only. + ### Generic Helpers - listQuery[T](store, includeDeleted, prepare) - generic list with optional soft-delete scope - getByID[T](store, id, prepare) - generic single fetch @@ -73,6 +78,27 @@ tea.Msg -> Update(msg) ### Parent Alive Validation - requireParentAlive(model, id): checks if FK parent is alive/deleted/gone +## Sync & Oplog Patterns + +### Local Oplog +- GORM hooks on insert/update/delete write SyncOplogEntry rows +- syncableTable() gates which tables produce oplog entries +- isSyncApplying(tx) skips logging when applying remote ops (prevents push loop) +- WithSyncApplying(ctx) wraps a context to set that flag + +### Sync Engine (internal/sync/engine.go) +- Engine.Sync(ctx): push local ops, pull remote ops, transfer blobs +- Ops are encrypted client-side; relay never sees plaintext +- Per-op ULID prevents reordering ambiguity +- Conflict resolution: last-writer-wins on CreatedAt, DeviceID lex tiebreaker + +### Relay Store Access (rlsdb) +- ALL PgStore methods use s.rls.Tx(ctx, householdID, fn) for RLS scoping +- WithoutHousehold is reserved for the small set of non-RLS endpoints + (AutoMigrate, AuthenticateDevice, GetKeyExchangeResult, StartJoin, + HouseholdBySubscription, HouseholdByCustomer) +- Each WithoutHousehold call site requires a `// SAFETY:` comment + ## Form Lifecycle ``` StartAddForm()/StartEditForm() @@ -125,6 +151,17 @@ User edits (keyboard, calendar picker) - tab-N, row-N, col-N, hint-ID, dash-N - house-header, breadcrumb-back, overlay +## Error Handling + +### errorlint Strict +- Project enables errorlint with errorf + errorf-multi + comparison + asserts +- Wrap with `%w` (never `%v`) — 700+ fmt.Errorf calls, all use `%w` +- Sentinel comparisons use errors.Is, type-narrowing uses errors.As + +### Loops +- Use `slices.Backward` for reverse iteration (Go 1.23+) +- Use `reflect.Pointer` (not the deprecated `reflect.Ptr`) + ## Config Resolution 1. Defaults (constants in config.go) 2. TOML file (XDG_CONFIG_HOME/micasa/config.toml) diff --git a/.claude/codebase/structure.md b/.claude/codebase/structure.md index 37589dc0..e73ec036 100644 --- a/.claude/codebase/structure.md +++ b/.claude/codebase/structure.md @@ -1,31 +1,42 @@ - + # Project Structure -~65K lines of Go. Bubble Tea TUI for home maintenance tracking. SQLite backend. +~80K lines of Go. Bubble Tea TUI for home maintenance tracking with optional +multi-device sync via an encrypted relay server. SQLite backend (TUI), +Postgres backend (relay). ## Directory Layout ``` -cmd/micasa/main.go CLI entry (cobra). runOpts, backupOpts, newRootCmd +cmd/micasa/ CLI entry points (cobra + fang) + main.go Root cmd, runOpts/demoOpts/backupOpts + query.go, show.go Read-only inspection subcommands + mcp.go Launch MCP server subcommand + pro.go Pro/subscription entitlement + sync_config.go Sync configuration + theme.go TUI theme application + cliref.go CLI reference generation + internal/ app/ TUI package (~30K lines, largest package) model.go Model struct, Init/Update/View, key dispatch - types.go Enums (Mode, FormKind, TabKind), Tab/cell/columnSpec structs + types.go Enums (Mode, FormKind, TabKind), Tab/cell/columnSpec handlers.go TabHandler interface + per-entity implementations view.go Render pipeline: buildView -> baseView + overlays table.go Table rendering, viewport, header/row rendering - coldefs.go Column definitions: single source of truth (columnDef slices + columnSpecs funcs) - columns_generated.go Generated typed iota column constants (from coldefs.go via gencolumns) - tables.go Tab definitions, row builders, table helpers + coldefs.go Column definitions: single source of truth + columns_generated.go Generated typed iota column constants + tables.go Tab definitions, row builders forms.go Form definitions per entity, submitForm flow form_select.go Select/dropdown form fields form_filepicker.go File picker for document uploads styles.go Styles struct (singleton appStyles), Wong palette - chat.go LLM chat overlay, two-stage NL->SQL->summary + chat.go LLM chat overlay, NL->SQL->summary extraction.go Document extraction UI, step tracking + extraction_render.go Extraction UI rendering dashboard.go Dashboard overlay, metrics, nav entries calendar.go Modal date picker filter.go Pin-and-filter system (AND across cols, OR within) @@ -33,27 +44,36 @@ internal/ column_finder.go Fuzzy column search (/ key) fuzzy.go Generic fuzzy matching mouse.go Zone-based click dispatch - house.go House profile view (collapsed/expanded + ASCII art) + house.go, house_fields.go House profile view + field metadata undo.go Undo/redo stacks (max 50, snapshot-based) mag.go Order-of-magnitude easter egg (m key) compact.go Column width optimization collapse.go Column hiding stream.go Streaming utilities docopen.go Open documents in external viewer - testmain_test.go Global test setup, template DB - model_with_store_test.go newTestModelWithStore(t) - model_with_demo_data_test.go newTestModelWithDemoData(t, seed) - data/ Persistence layer - store.go Store struct (gorm.DB), all CRUD, soft-delete, seeding - models.go GORM entity structs (14 models) - query.go Schema inspection, ReadOnlyQuery, DataDump for LLM + chat_render.go Chat overlay rendering + notes.go Notes preview overlay + detail.go Detail view drill-down + inline_edit.go Inline cell editing dispatch + model_tabs.go Tab construction + cmd/gencolumns/ Code generator for columns_generated.go + + data/ Persistence layer (SQLite) + store.go Store struct (gorm.DB), open/close/migrate + store_*.go Per-entity CRUD (vendor, appliance, project, ...) + store_seed.go Seed lookups + store_hard_delete.go Hard-delete operations (compaction) + models.go GORM entity structs (16 models incl. SyncOplogEntry, SyncDevice) + oplog.go Sync oplog: hooks, syncableTable, WithSyncApplying + fts.go Full-text search (SQLite FTS5) + defaults.go Default-tag reflection helper (StructDefault) + query.go Schema inspection, ReadOnlyQuery, DataDump backup.go SQLite Online Backup API dashboard.go Dashboard queries (overdue, upcoming, spending) validation.go Date/int/float/interval parsing settings.go Key-value store, chat history errors.go hintError, FieldError, sentinels - meta.go go:generate directive - meta_generated.go Table*/Col* constants (generated) + meta.go, meta_generated.go Table*/Col* constants (generated) entity_context.go Entity names for LLM entity_rows.go (id, name) tuples for LLM FK resolution doccache.go Document cache (XDG_CACHE_HOME) @@ -61,43 +81,89 @@ internal/ units.go UnitSystem (metric/imperial) seed_scaled.go Scaled demo data (N years) ddl.go Table DDL retrieval + migrate.go Migration orchestration sqlite/ Custom SQLite dialect (inlined from glebarez/sqlite) sqlite.go Dialect, PRAGMA connector, type mapping ddlmod.go DDL parsing & manipulation - migrator.go GORM Migrator override (table recreation for ALTER) - cmd/genmeta/main.go Code generator for meta_generated.go - app/cmd/gencolumns/main.go Code generator for columns_generated.go (from coldefs.go) + migrator.go GORM Migrator override + cmd/genmeta/ Code generator for meta_generated.go + config/ TOML config with env var overrides config.go Config struct, Load(), provider auto-detect bytesize.go ByteSize custom type ("50 MiB") duration.go Duration custom type ("30d") show.go Config display/dump + query.go Config inspection helpers + cmd/ Config-related code generators + extract/ Document extraction pipeline - extractor.go Extractor interface + extractor.go Extractor interface, PDFTextExtractor, PDFOCRExtractor pipeline.go Pipeline orchestration (text -> OCR -> LLM) text.go pdftotext extraction ocr.go tesseract OCR (parallel image acquisition) + ocr_progress.go OCR progress tracking (toolPDFToCairo, toolTesseract) llmextract.go LLM-powered structured extraction operations.go INSERT/UPDATE/DELETE operation types shadow.go Shadow DB for staging - sqlcontext.go Schema context for LLM - tools.go Tool availability checks - ocr_progress.go OCR progress tracking + sqlcontext.go ExtractionTableDefs: schema for LLM + tools.go Tool availability checks (pdftocairo, tesseract, pdftotext) + llm/ LLM interfaces and any-llm-go client provider.go Base, ChatProvider, ExtractionProvider interfaces - client.go Client wrapping any-llm-go (satisfies both interfaces) + client.go Client wrapping any-llm-go; provider constants prompt.go System prompts (SQL gen, summary, fallback) - sqlfmt.go SQL formatting + claudecli/ Claude CLI subprocess backend client.go Client implementing ExtractionProvider via claude binary - client_test.go Tests with TestHelperProcess mock + + sync/ Multi-device sync (encrypted oplog) + types.go Envelope, PushRequest, Household, BlobStorage, ... + client.go Client: HTTP push/pull against relay + engine.go Engine.Sync orchestrates push+pull+blob transfer + apply.go ApplyOps with LWW conflict resolution + household.go Household creation, key exchange flow + blob.go Blob (binary) upload/download + + relay/ Relay server (Postgres backend, encrypted ops) + handler.go HTTP handlers (push, pull, invite, blob, ...) + store.go Store interface (24 methods); constants + pgstore.go Postgres implementation; lockForUpdate const + memstore.go In-memory implementation (tests) + blob.go Blob storage abstraction + tokencrypt.go Token encryption for invite codes + stripe.go Stripe webhook handling + rlsdb/ Row-level security DB wrapper + DB.Tx(ctx, householdID, fn) enforces RLS + + crypto/ Cryptographic primitives + keys.go HouseholdKey, DeviceKeyPair, key persistence + encrypt.go Encrypt/Decrypt (NaCl secretbox + key derivation) + box.go BoxSeal/BoxOpen (NaCl box for key exchange) + token.go Device bearer token persistence + + mcp/ MCP (Model Context Protocol) server + server.go Server struct, stdio JSON-RPC loop + tools.go MCP tools (query, get_schema, search_documents, ...) + fake/ Demo data generator fake.go HomeFaker (seeded gofakeit) words.go Word lists + ollama/ Ollama model pull API pull.go PullModel(), PullScanner (streaming) + locale/ Currency formatting currency.go Currency type, FormatMoney, ParseMoney + + sqlfmt/ SQL pretty-printer (extracted from llm) + sqlfmt.go FormatSQL: tokenizer + layout + wrap + + address/ Postal code -> city/state lookup + lookup.go Lookup() against zippopotam.us + + uid/ ULID wrapper + uid.go New(), IsValid() (oklog/ulid v2) + safeconv/ Safe int64->int narrowing narrow.go Int() with overflow check ``` @@ -107,21 +173,28 @@ internal/ - `flake.nix` - Nix build (buildGoModule), dev shell, pre-commit hooks - `nix/module.nix` - NixOS module - `.github/workflows/ci.yml` - Multi-OS matrix (Ubuntu x86/ARM, macOS, Windows) -- `.golangci.yml` - Linter config (exhaustive, wrapcheck, goconst min 5, etc.) -- `go.mod` - Go 1.25.5, key deps: bubbletea/lipgloss/huh, gorm+modernc sqlite, any-llm-go +- `.golangci.yml` - Linter config: `default: all` with errorlint strict + (errorf + errorf-multi + comparison + asserts), goconst/gomodguard + disabled +- `go.mod` - Go 1.25.5; deps include bubbletea v2 / lipgloss v2 / huh v2, + gorm + modernc sqlite, any-llm-go, modelcontextprotocol/go-sdk - `docs/` - Hugo site (guides, reference, blog) - `plans/` - Design documents (committed to repo) ## Key Dependencies -- `charmbracelet/bubbletea` - TUI framework -- `charmbracelet/lipgloss` - Styling -- `charmbracelet/huh` - Form components -- `charmbracelet/bubbles` - Table, viewport, textinput, spinner +- `charm.land/bubbletea/v2` - TUI framework (v2 series) +- `charm.land/lipgloss/v2` - Styling +- `charm.land/huh/v2` - Form components +- `charm.land/bubbles/v2` - Table, viewport, textinput, spinner +- `charm.land/fang/v2` - cobra wrapper with styling - `gorm.io/gorm` + `modernc.org/sqlite` - Pure Go SQLite +- `gorm.io/driver/postgres` - Postgres driver (relay only) - `mozilla-ai/any-llm-go` - Multi-provider LLM client -- `lrstanley/bubblezone` - Mouse zone tracking -- `rmhubbert/bubbletea-overlay` - Overlay compositing +- `modelcontextprotocol/go-sdk` - MCP server +- `lrstanley/bubblezone/v2` - Mouse zone tracking +- `oklog/ulid/v2` - ULIDs - `brianvoe/gofakeit` - Random data generation - `spf13/cobra` - CLI parsing - `stretchr/testify` - Test assertions +- `stripe/stripe-go` - Stripe webhooks (relay) diff --git a/.claude/codebase/types.md b/.claude/codebase/types.md index 64b0b9b1..c02737fe 100644 --- a/.claude/codebase/types.md +++ b/.claude/codebase/types.md @@ -1,6 +1,6 @@ - + # Key Types & Interfaces @@ -53,7 +53,7 @@ Central Bubbletea state. Key fields: - Snapshot(store, id) (undoEntry, error) - SyncFixedValues(store, specs) -8 implementations (one per entity tab; formHouse is handled separately): +8 implementations (one per entity tab; formHouse handled separately): projectHandler, quoteHandler, maintenanceHandler, incidentHandler, applianceHandler, serviceLogHandler, vendorHandler, documentHandler @@ -79,8 +79,9 @@ Parent info (tab, entity, ID), Breadcrumb string, Tab (the detail sub-tab) ### Store (store.go) - db *gorm.DB, maxDocumentSize uint64, currency locale.Currency - Key methods: Open, Close, Transaction, AutoMigrate, Backup, IsMicasaDB +- Per-entity CRUD is split across store_*.go files -### Entity Models (models.go) - all have ID uint, CreatedAt, UpdatedAt +### Entity Models (models.go) - all have ID + CreatedAt + UpdatedAt unless noted - HouseProfile - address, year built, property details, insurance, HOA - ProjectType - lookup table (not soft-deletable) - Project - title, ProjectTypeID, status, dates, budget/actual (cents) @@ -95,6 +96,14 @@ Parent info (tab, entity, ID), Breadcrumb string, Tab (the detail sub-tab) - DeletionRecord - audit trail (Entity, TargetID, DeletedAt, RestoredAt) - Setting - key-value store - ChatInput - persistent chat history +- SyncOplogEntry - ULID id, TableName, RowID, OpType, Payload (JSON), + DeviceID, CreatedAt, AppliedAt?, SyncedAt? +- SyncDevice - this device's identity (ID, Name, HouseholdID, RelayURL, LastSeq) + +### Sync Oplog Hooks (oplog.go) +- syncApplyingKey: context key suppressing oplog writes when applying remote ops +- WithSyncApplying(ctx): mark ctx as remote-apply (prevents push loop) +- syncableTable(table): true for sync-eligible tables ### Project Status Constants ProjectStatusIdeating, Planned, Quoted, InProgress, Delayed, Completed, Abandoned @@ -116,7 +125,8 @@ SeasonSpring, SeasonSummer, SeasonFall, SeasonWinter ### Generated Constants (meta_generated.go) Table* (e.g., TableVendors = "vendors") -Col* (e.g., ColID = "id", ColName = "name", ColDeletedAt = "deleted_at") +Col* (e.g., ColID = "id", ColName = "name", ColDeletedAt = "deleted_at", + ColOpType, ColPayload, ColAppliedAt, ColSyncedAt, ColTableName, ColRowID, ColDeviceID, ColCreatedAt) ## Config Types (internal/config/) @@ -129,6 +139,7 @@ Col* (e.g., ColID = "id", ColName = "name", ColDeletedAt = "deleted_at") - OCRTSV (Enable *bool, ConfidenceThreshold *int) - Documents (MaxFileSize ByteSize, CacheTTL Duration) - Locale (Currency string) +- Sync (Enable *bool, RelayURL, etc.) Each pipeline section is self-contained; no cross-section inheritance. ### Defaults @@ -140,15 +151,20 @@ Each pipeline section is self-contained; no cross-section inheritance. ## LLM Types (internal/llm/) ### Interfaces (provider.go) -- Base: shared model management (Model, SetModel, Ping, ListModels, Timeout, etc.) -- ChatProvider: Base + ChatStream(ctx, messages) -- chat pipeline -- ExtractionProvider: Base + ExtractStream(ctx, messages, schema) -- extraction pipeline +- Base: shared model management (Model, SetModel, Ping, ListModels, Timeout, ...) +- ChatProvider: Base + ChatStream(ctx, messages) +- ExtractionProvider: Base + ExtractStream(ctx, messages, schema) + +### Provider Constants (client.go) +providerOllama, providerLlamacpp, providerLlamafile, providerAnthropic, +providerOpenAI, providerOpenRouter, providerDeepseek, providerGemini, +providerGroq, providerMistral ### Client (client.go) - Wraps any-llm-go provider, satisfies both ChatProvider and ExtractionProvider - ChatStream: streaming text responses (NL->SQL, summaries) - ExtractStream: streaming JSON schema-constrained responses -- SetThinking(level), Model(), ProviderName(), BaseURL() +- SetEffort(level), Model(), ProviderName(), BaseURL(), IsLocalServer() ### claudecli.Client (internal/claudecli/) - Implements ExtractionProvider by shelling out to claude CLI binary @@ -156,9 +172,122 @@ Each pipeline section is self-contained; no cross-section inheritance. - Uses cmdFactory DI for testability (TestHelperProcess re-exec pattern) - Flags: --tools "" --disable-slash-commands --no-chrome --setting-sources local -### Extract Types (internal/extract/) -- Extractor interface: Extract(ctx, data, mime) ([]TextSource, error) -- Pipeline: orchestrates extractors + optional LLM +## Extract Types (internal/extract/) + +### Extractor (extractor.go) +- Interface: Matches(mime), Extract(ctx, data, mime) ([]TextSource, error) +- Implementations: PDFTextExtractor (pdftotext), PDFOCRExtractor (tesseract) +- Tool name constants: toolPDFToCairo, toolTesseract (ocr_progress.go) + +### Pipeline (pipeline.go) +- Orchestrates extractors + optional LLM - TextSource: Tool, Desc, Text, Data - Operation: entity operation (INSERT/UPDATE/DELETE) - Result: Sources, Operations, LLMRaw, LLMUsed, Err + +### Schema Context (sqlcontext.go) +- ExtractionTableDefs: single source of truth for extraction table metadata +- Columns derived from generated meta via columnsFromMeta +- Actions, Required, Enum, Omit, synthetic columns: hand-maintained + +### Constants +MIMEApplicationPDF = "application/pdf" + +## Sync Types (internal/sync/) + +### Envelope (types.go) +Encrypted payload moved between client and relay. Fields: HouseholdID, +DeviceID, OpID, TableName, RowID, OpType, Ciphertext, Nonce, Seq?, CreatedAt. + +### PushRequest / PushResponse / PushConfirmation +Push: client -> relay; relay assigns sequence numbers, returns confirmations. + +### PullResponse +relay -> client; encrypted envelopes plus a more-pages flag. + +### Household (types.go) +ID, OwnerDeviceID, CreatedAt, StripeCustomerID?, StripeSubscriptionID?, StripeStatus? + +### BlobStorage (types.go) +Used and quota counters for a household. + +### Client (client.go) +HTTP client against the relay (Push, Pull, etc.). NewClient(baseURL, token, key); +NewManagementClient for admin-only flows. + +### Engine (engine.go) +Engine.Sync(ctx) orchestrates pushAll + pullAll + uploadPendingBlobs + fetchPendingBlobs. +Returns SyncResult (pushed/pulled/conflicts/blobs). + +### OpPayload (client.go) +Decrypted-side view of a sync op: ID, TableName, RowID, OpType, Payload, DeviceID, CreatedAt. + +### DecryptedOp (client.go) +Pull-side: Envelope + decoded OpPayload. + +### Apply (apply.go) +ApplyOps(ctx, db, ops): applies decrypted ops with LWW conflict resolution. +- applyInsert / applyUpdate / applyDelete / applyRestore +- lwwLocalWins: compare CreatedAt; ties broken by DeviceID lex order +- recordAppliedOp / recordUnappliedOp: write sync_oplog_entries row + +## Relay Types (internal/relay/) + +### Store interface (store.go) - 24 methods +- Push / Pull (encrypted ops) +- CreateHousehold / RegisterDevice / AuthenticateDevice +- CreateInvite / StartJoin / GetPendingExchanges / CompleteKeyExchange / GetKeyExchangeResult +- ListDevices / RevokeDevice +- GetHousehold / UpdateSubscription / HouseholdBySubscription / UpdateCustomerID / HouseholdByCustomer +- OpsCount / PutBlob / GetBlob / HasBlob / BlobUsage +- SetEncryptionKey / Close + +### Implementations +- PgStore (pgstore.go): Postgres via GORM + rlsdb.DB.Tx for row-level security +- MemStore (memstore.go): in-memory for tests + +### Constants +- maxInviteAttempts = 5, maxActiveInvites = 3 +- inviteExpiry = 4h, keyExchangeExpiry = 15m +- lockForUpdate = "UPDATE" (GORM clause.Locking.Strength) + +### Handler (handler.go) +HTTP handlers wiring Store to bearer-token-auth routes. ServeHTTP delegates +to internal http.ServeMux. + +### rlsdb (rlsdb/) - row-level security wrapper +- DB struct, DB.Tx(ctx, householdID, fn): runs fn within a transaction with + Postgres session settings that enforce RLS on the household's data +- Unexported *gorm.DB structurally prevents bypass from relay package + +## Crypto Types (internal/crypto/) + +### HouseholdKey [KeySize]byte +Symmetric key for AEAD. String() returns "[REDACTED]" to prevent leaks. +- GenerateHouseholdKey(), SaveHouseholdKey, LoadHouseholdKey + +### DeviceKeyPair +Curve25519 public/private pair for box encryption (key exchange). +- GenerateDeviceKeyPair(), SaveDeviceKeyPair, LoadDeviceKeyPair + +### Encrypt / Decrypt (encrypt.go) +NaCl secretbox with random 24-byte nonces. + +### BoxSeal / BoxOpen (box.go) +NaCl box (authenticated public-key) for key exchange. + +### SecretsDir(), SaveDeviceToken / LoadDeviceToken (token.go) +Bearer token persistence with restrictive file perms. + +## MCP Types (internal/mcp/) + +### Server (server.go) +- Server struct wrapping *data.Store +- Serve(ctx, stdin, stdout): stdio JSON-RPC loop, MCP protocol + +### Tools (tools.go) +- query: arbitrary SELECT against the local DB (ReadOnlyQuery enforced) +- get_schema: table list + columns +- search_documents: filter by entity, date range, MIME, full-text +- get_maintenance_schedule: due/overdue items +- get_house_profile: structured house metadata diff --git a/.golangci.yml b/.golangci.yml index 5631786b..d8e9505a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -49,6 +49,16 @@ linters: - gomodguard_v2 # explicit v2 opt-in (default: all enables the deprecated v1 alias) settings: + errorlint: + # Catch direct comparison of errors (require errors.Is). + comparison: true + # Catch type assertions on errors (require errors.As). + asserts: true + # Catch fmt.Errorf without %w when wrapping an error. + errorf: true + # Allow multiple %w verbs in a single fmt.Errorf (Go 1.20+). + errorf-multi: true + exhaustive: default-signifies-exhaustive: false diff --git a/internal/extract/llmextract_test.go b/internal/extract/llmextract_test.go index 962295cc..1c6b5bbb 100644 --- a/internal/extract/llmextract_test.go +++ b/internal/extract/llmextract_test.go @@ -28,7 +28,7 @@ func TestBuildExtractionPrompt(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "42", Filename: "invoice.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SizeBytes: 12345, Schema: schema, Sources: []TextSource{ @@ -62,7 +62,7 @@ func TestBuildExtractionPrompt_DualSources(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "mixed.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Sources: []TextSource{ {Tool: "pdftotext", Desc: "Digital text.", Text: "Digital text from pages 1-2"}, {Tool: "tesseract", Desc: "OCR text.", Text: "OCR text from page 3"}, @@ -82,7 +82,7 @@ func TestBuildExtractionPrompt_OCROnly(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Sources: []TextSource{ {Tool: "tesseract", Desc: "OCR text.", Text: "OCR text from all pages"}, }, @@ -113,7 +113,7 @@ func TestBuildExtractionPrompt_EmptyDocID(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "", Filename: "new.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Sources: []TextSource{ {Tool: "pdftotext", Text: "Some text"}, }, @@ -129,7 +129,7 @@ func TestBuildExtractionPrompt_NonZeroDocID(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "42", Filename: "existing.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Sources: []TextSource{ {Tool: "pdftotext", Text: "Some text"}, }, @@ -189,7 +189,7 @@ func TestBuildExtractionPrompt_ContainsDomainHints(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "test.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Schema: SchemaContext{ DDL: map[string]string{ data.TableVendors: "CREATE TABLE `vendors` (`id` integer)", @@ -247,7 +247,7 @@ func TestBuildExtractionPrompt_OmitsSchemaRedundantSections(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "test.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, Schema: SchemaContext{ DDL: map[string]string{ data.TableVendors: "CREATE TABLE `vendors` (`id` integer)", @@ -276,7 +276,7 @@ func TestBuildExtractionPrompt_SpatialSentWhenEnabled(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -300,7 +300,7 @@ func TestBuildExtractionPrompt_SpatialNotSentByDefault(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, // SendTSV defaults to false. Sources: []TextSource{ {Tool: "tesseract", Desc: "OCR text.", Text: "Invoice #1042", Data: []byte(sampleTSV)}, @@ -320,7 +320,7 @@ func TestBuildExtractionPrompt_SpatialMixedSources(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "mixed.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -345,7 +345,7 @@ func TestBuildExtractionPrompt_TSVColumnHintIncluded(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -367,7 +367,7 @@ func TestBuildExtractionPrompt_TSVSourceWithoutData(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -387,7 +387,7 @@ func TestBuildExtractionPrompt_SpatialFallbackOnEmptyTSV(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -413,7 +413,7 @@ func TestBuildExtractionPrompt_TSVPreambleMentionsSpatial(t *testing.T) { msgs := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: DefaultOCRConfThreshold, Sources: []TextSource{ @@ -434,7 +434,7 @@ func TestBuildExtractionPrompt_ConfThresholdThreaded(t *testing.T) { msgsHigh := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: 96, Sources: []TextSource{ @@ -448,7 +448,7 @@ func TestBuildExtractionPrompt_ConfThresholdThreaded(t *testing.T) { msgsLow := BuildExtractionPrompt(ExtractionPromptInput{ DocID: "1", Filename: "scan.pdf", - MIME: "application/pdf", + MIME: MIMEApplicationPDF, SendTSV: true, ConfThreshold: 70, Sources: []TextSource{