diff --git a/cmd/msgvault/cmd/tui.go b/cmd/msgvault/cmd/tui.go index 2fa2ef88..eaa60700 100644 --- a/cmd/msgvault/cmd/tui.go +++ b/cmd/msgvault/cmd/tui.go @@ -81,30 +81,14 @@ Remote Mode: } else { // Local mode - use local database dbPath := cfg.DatabaseDSN() - s, err := store.Open(dbPath) + analyticsDir := cfg.AnalyticsDir() + + s, err := openLocalTUIStore(dbPath, analyticsDir) if err != nil { - return fmt.Errorf("open database: %w", err) + return err } defer func() { _ = s.Close() }() - // Ensure schema is up to date - if err := s.InitSchema(); err != nil { - return fmt.Errorf("init schema: %w", err) - } - if err := runStartupMigrations(s); err != nil { - return fmt.Errorf("startup migrations: %w", err) - } - - // Build FTS index in background — TUI uses DuckDB/Parquet for - // aggregates and only needs FTS for deep search (Tab to switch). - if s.NeedsFTSBackfill() { - go func() { - _, _ = s.BackfillFTS(nil) - }() - } - - analyticsDir := cfg.AnalyticsDir() - // The Parquet analytics cache is a SQLite → DuckDB ETL and // has no meaning when the system of record is PostgreSQL — // buildCache feeds the DSN to the SQLite driver and @@ -114,25 +98,10 @@ Remote Mode: if s.IsPostgreSQL() { engine = query.NewEngine(s.DB(), true) } else { - // Check if cache needs to be built/updated (unless forcing SQL or skipping) - if !forceSQL && !skipCacheBuild { - staleness := cacheNeedsBuild(dbPath, analyticsDir) - if staleness.NeedsBuild { - fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) - result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) - fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") - } else if !result.Skipped { - fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) - } - } - } - // Determine query engine to use if !forceSQL && query.HasCompleteParquetData(analyticsDir) { // Use DuckDB for fast Parquet queries - var duckOpts query.DuckDBOptions + duckOpts := tuiDuckDBOptions() if noSQLiteScanner { duckOpts.DisableSQLiteScanner = true } @@ -154,6 +123,14 @@ Remote Mode: engine = query.NewEngine(s.DB(), false) } } + + // Build FTS index in background after cache/engine startup. The + // aggregate TUI path uses Parquet; FTS is only needed for deep search. + if s.NeedsFTSBackfill() { + go func() { + _, _ = s.BackfillFTS(nil) + }() + } } // Check if engine supports text queries @@ -191,6 +168,51 @@ Remote Mode: }, } +func openLocalTUIStore(dbPath, analyticsDir string) (*store.Store, error) { + s, err := openMigratedLocalStore(dbPath) + if err != nil { + return nil, err + } + + if s.IsPostgreSQL() || forceSQL || skipCacheBuild { + return s, nil + } + + if err := s.Close(); err != nil { + return nil, fmt.Errorf("close database before cache build: %w", err) + } + + staleness := cacheNeedsBuild(dbPath, analyticsDir) + if staleness.NeedsBuild { + fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) + result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) + fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") + } else if !result.Skipped { + fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) + } + } + + return openMigratedLocalStore(dbPath) +} + +func openMigratedLocalStore(dbPath string) (*store.Store, error) { + s, err := store.Open(dbPath) + if err != nil { + return nil, fmt.Errorf("open database: %w", err) + } + if err := s.InitSchema(); err != nil { + _ = s.Close() + return nil, fmt.Errorf("init schema: %w", err) + } + if err := runStartupMigrations(s); err != nil { + _ = s.Close() + return nil, fmt.Errorf("startup migrations: %w", err) + } + return s, nil +} + // cacheStaleness describes why the analytics cache needs a rebuild. type cacheStaleness struct { NeedsBuild bool @@ -201,6 +223,10 @@ type cacheStaleness struct { Reason string } +func tuiDuckDBOptions() query.DuckDBOptions { + return query.DuckDBOptions{DisableSQLiteScanner: true} +} + // cacheNeedsBuild checks if the analytics cache needs to be built or // updated. Collects all staleness signals before returning so that // e.g. a mixed add+delete sync correctly reports both. diff --git a/cmd/msgvault/cmd/tui_test.go b/cmd/msgvault/cmd/tui_test.go new file mode 100644 index 00000000..76dd9bf7 --- /dev/null +++ b/cmd/msgvault/cmd/tui_test.go @@ -0,0 +1,175 @@ +package cmd + +import ( + "database/sql" + "path/filepath" + "testing" + + _ "github.com/mattn/go-sqlite3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/msgvault/internal/config" + "go.kenn.io/msgvault/internal/query" +) + +func TestTUIDuckDBOptionsDisableSQLiteScannerByDefault(t *testing.T) { + opts := tuiDuckDBOptions() + + assert.True(t, opts.DisableSQLiteScanner, + "TUI must not attach the live SQLite DB through DuckDB sqlite_scanner") +} + +func TestOpenLocalTUIStoreMigratesBeforeCacheBuild(t *testing.T) { + savedCfg := cfg + savedForceSQL := forceSQL + savedSkipCacheBuild := skipCacheBuild + t.Cleanup(func() { + cfg = savedCfg + forceSQL = savedForceSQL + skipCacheBuild = savedSkipCacheBuild + }) + cfg = &config.Config{} + forceSQL = false + skipCacheBuild = false + + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "legacy.db") + analyticsDir := filepath.Join(tmpDir, "analytics") + setupLegacyTUIDBMissingDeletedAt(t, dbPath) + + st, err := openLocalTUIStore(dbPath, analyticsDir) + require.NoError(t, err, "openLocalTUIStore") + t.Cleanup(func() { _ = st.Close() }) + + assert.True(t, query.HasCompleteParquetData(analyticsDir), + "TUI startup should build cache after legacy columns are migrated") +} + +func setupLegacyTUIDBMissingDeletedAt(t *testing.T, dbPath string) { + t.Helper() + db, err := sql.Open("sqlite3", dbPath) + require.NoError(t, err, "open sqlite") + defer func() { _ = db.Close() }() + + _, err = db.Exec(` + CREATE TABLE sources ( + id INTEGER PRIMARY KEY, + source_type TEXT NOT NULL DEFAULT 'gmail', + identifier TEXT NOT NULL, + display_name TEXT, + google_user_id TEXT UNIQUE, + last_sync_at DATETIME, + sync_cursor TEXT, + sync_config JSON, + oauth_app TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE(source_type, identifier) + ); + CREATE TABLE conversations ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_conversation_id TEXT, + conversation_type TEXT NOT NULL DEFAULT 'email_thread', + title TEXT, + participant_count INTEGER DEFAULT 0, + message_count INTEGER DEFAULT 0, + unread_count INTEGER DEFAULT 0, + last_message_at DATETIME, + last_message_preview TEXT, + metadata JSON, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + CREATE TABLE participants ( + id INTEGER PRIMARY KEY, + email_address TEXT, + phone_number TEXT, + display_name TEXT, + domain TEXT, + canonical_id TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_message_id TEXT NOT NULL, + conversation_id INTEGER, + subject TEXT, + snippet TEXT, + sent_at DATETIME, + received_at DATETIME, + internal_date DATETIME, + size_estimate INTEGER, + has_attachments BOOLEAN DEFAULT FALSE, + is_from_me BOOLEAN DEFAULT FALSE, + archived_at DATETIME, + rfc822_message_id TEXT, + sender_id INTEGER, + message_type TEXT NOT NULL DEFAULT 'email', + attachment_count INTEGER DEFAULT 0, + deleted_from_source_at DATETIME, + UNIQUE(source_id, source_message_id) + ); + CREATE TABLE message_recipients ( + id INTEGER PRIMARY KEY, + message_id INTEGER NOT NULL, + participant_id INTEGER NOT NULL, + recipient_type TEXT NOT NULL, + display_name TEXT + ); + CREATE TABLE labels ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_label_id TEXT, + name TEXT NOT NULL, + label_type TEXT + ); + CREATE TABLE message_labels ( + message_id INTEGER NOT NULL, + label_id INTEGER NOT NULL, + PRIMARY KEY (message_id, label_id) + ); + CREATE TABLE attachments ( + id INTEGER PRIMARY KEY, + message_id INTEGER NOT NULL, + filename TEXT, + mime_type TEXT, + size INTEGER, + content_hash TEXT, + storage_path TEXT NOT NULL DEFAULT '', + media_type TEXT, + width INTEGER, + height INTEGER, + duration_ms INTEGER, + thumbnail_hash TEXT, + thumbnail_path TEXT, + source_attachment_id TEXT, + attachment_metadata JSON, + encryption_version INTEGER DEFAULT 0, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + + INSERT INTO sources (id, source_type, identifier, display_name) + VALUES (1, 'gmail', 'test@example.com', 'Test Account'); + INSERT INTO conversations (id, source_id, source_conversation_id, title) + VALUES (1, 1, 'thread-1', 'Legacy Thread'); + INSERT INTO participants (id, email_address, domain, display_name) + VALUES (1, 'alice@example.com', 'example.com', 'Alice'), + (2, 'bob@example.com', 'example.com', 'Bob'); + INSERT INTO messages ( + id, source_id, source_message_id, conversation_id, subject, snippet, + sent_at, size_estimate, has_attachments + ) VALUES ( + 1, 1, 'msg-1', 1, 'Legacy message', 'Preview', + '2024-01-02 03:04:05', 1234, 0 + ); + INSERT INTO message_recipients (message_id, participant_id, recipient_type, display_name) + VALUES (1, 1, 'from', 'Alice'), (1, 2, 'to', 'Bob'); + INSERT INTO labels (id, source_id, source_label_id, name, label_type) + VALUES (1, 1, 'INBOX', 'INBOX', 'system'); + INSERT INTO message_labels (message_id, label_id) VALUES (1, 1); + `) + require.NoError(t, err, "create legacy TUI fixture") +}