From c0d529226bda4f1f4315fc27d43f73e886cdbe1f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 24 Jun 2026 15:51:49 -0500 Subject: [PATCH 1/2] Avoid mixed SQLite access during TUI startup Issue #401 shows TUI startup rebuilding the analytics cache while another writer is active, with DuckDB and go-sqlite3 both in the crash stack. The TUI should not overlap its long-lived Store or background FTS maintenance with DuckDB sqlite_scanner access to the same database file. Run the automatic cache rebuild before opening the TUI Store, start background FTS only after the query engine is initialized, and default the interactive DuckDB engine to the direct SQLite fallback for detail reads. This matches the daemon-side constraint that keeps DuckDB's bundled SQLite from attaching a live msgvault database inside a process that also owns go-sqlite3 connections. Generated with Codex Co-authored-by: Codex --- cmd/msgvault/cmd/tui.go | 40 ++++++++++++++++++++++++++---------- cmd/msgvault/cmd/tui_test.go | 14 +++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 cmd/msgvault/cmd/tui_test.go diff --git a/cmd/msgvault/cmd/tui.go b/cmd/msgvault/cmd/tui.go index 2fa2ef88d..ba978cafa 100644 --- a/cmd/msgvault/cmd/tui.go +++ b/cmd/msgvault/cmd/tui.go @@ -81,6 +81,22 @@ Remote Mode: } else { // Local mode - use local database dbPath := cfg.DatabaseDSN() + analyticsDir := cfg.AnalyticsDir() + + if !store.IsPostgresURL(dbPath) && !forceSQL && !skipCacheBuild { + staleness := cacheNeedsBuild(dbPath, analyticsDir) + if staleness.NeedsBuild { + fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) + result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) + fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") + } else if !result.Skipped { + fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) + } + } + } + s, err := store.Open(dbPath) if err != nil { return fmt.Errorf("open database: %w", err) @@ -95,16 +111,6 @@ Remote Mode: return fmt.Errorf("startup migrations: %w", err) } - // Build FTS index in background — TUI uses DuckDB/Parquet for - // aggregates and only needs FTS for deep search (Tab to switch). - if s.NeedsFTSBackfill() { - go func() { - _, _ = s.BackfillFTS(nil) - }() - } - - analyticsDir := cfg.AnalyticsDir() - // The Parquet analytics cache is a SQLite → DuckDB ETL and // has no meaning when the system of record is PostgreSQL — // buildCache feeds the DSN to the SQLite driver and @@ -132,7 +138,7 @@ Remote Mode: // Determine query engine to use if !forceSQL && query.HasCompleteParquetData(analyticsDir) { // Use DuckDB for fast Parquet queries - var duckOpts query.DuckDBOptions + duckOpts := tuiDuckDBOptions() if noSQLiteScanner { duckOpts.DisableSQLiteScanner = true } @@ -154,6 +160,14 @@ Remote Mode: engine = query.NewEngine(s.DB(), false) } } + + // Build FTS index in background after cache/engine startup. The + // aggregate TUI path uses Parquet; FTS is only needed for deep search. + if s.NeedsFTSBackfill() { + go func() { + _, _ = s.BackfillFTS(nil) + }() + } } // Check if engine supports text queries @@ -201,6 +215,10 @@ type cacheStaleness struct { Reason string } +func tuiDuckDBOptions() query.DuckDBOptions { + return query.DuckDBOptions{DisableSQLiteScanner: true} +} + // cacheNeedsBuild checks if the analytics cache needs to be built or // updated. Collects all staleness signals before returning so that // e.g. a mixed add+delete sync correctly reports both. diff --git a/cmd/msgvault/cmd/tui_test.go b/cmd/msgvault/cmd/tui_test.go new file mode 100644 index 000000000..31122d095 --- /dev/null +++ b/cmd/msgvault/cmd/tui_test.go @@ -0,0 +1,14 @@ +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTUIDuckDBOptionsDisableSQLiteScannerByDefault(t *testing.T) { + opts := tuiDuckDBOptions() + + assert.True(t, opts.DisableSQLiteScanner, + "TUI must not attach the live SQLite DB through DuckDB sqlite_scanner") +} From a28897d8f2909cb77488002a8c5ca03adc9ab1da Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 24 Jun 2026 22:03:01 -0500 Subject: [PATCH 2/2] Run TUI cache build after migrations The first pass for issue #401 moved automatic cache export before the long-lived TUI Store existed, but it also moved the export ahead of InitSchema/runStartupMigrations and left a second post-open build block behind. That could fail on upgraded SQLite databases whose cache-export columns are added by startup migrations, or retry the DuckDB export while the TUI Store was already open. Keep the mixed-SQLite avoidance invariant by using a short-lived migrated store first, closing it before any cache export, and reopening the migrated store for the interactive TUI. The TUI now has one cache-build point, after migrations and before the long-lived Store is used by the query engine. Generated with Codex Co-authored-by: Codex --- cmd/msgvault/cmd/tui.go | 86 ++++++++++--------- cmd/msgvault/cmd/tui_test.go | 161 +++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 39 deletions(-) diff --git a/cmd/msgvault/cmd/tui.go b/cmd/msgvault/cmd/tui.go index ba978cafa..eaa607005 100644 --- a/cmd/msgvault/cmd/tui.go +++ b/cmd/msgvault/cmd/tui.go @@ -83,34 +83,12 @@ Remote Mode: dbPath := cfg.DatabaseDSN() analyticsDir := cfg.AnalyticsDir() - if !store.IsPostgresURL(dbPath) && !forceSQL && !skipCacheBuild { - staleness := cacheNeedsBuild(dbPath, analyticsDir) - if staleness.NeedsBuild { - fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) - result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) - fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") - } else if !result.Skipped { - fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) - } - } - } - - s, err := store.Open(dbPath) + s, err := openLocalTUIStore(dbPath, analyticsDir) if err != nil { - return fmt.Errorf("open database: %w", err) + return err } defer func() { _ = s.Close() }() - // Ensure schema is up to date - if err := s.InitSchema(); err != nil { - return fmt.Errorf("init schema: %w", err) - } - if err := runStartupMigrations(s); err != nil { - return fmt.Errorf("startup migrations: %w", err) - } - // The Parquet analytics cache is a SQLite → DuckDB ETL and // has no meaning when the system of record is PostgreSQL — // buildCache feeds the DSN to the SQLite driver and @@ -120,21 +98,6 @@ Remote Mode: if s.IsPostgreSQL() { engine = query.NewEngine(s.DB(), true) } else { - // Check if cache needs to be built/updated (unless forcing SQL or skipping) - if !forceSQL && !skipCacheBuild { - staleness := cacheNeedsBuild(dbPath, analyticsDir) - if staleness.NeedsBuild { - fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) - result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) - fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") - } else if !result.Skipped { - fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) - } - } - } - // Determine query engine to use if !forceSQL && query.HasCompleteParquetData(analyticsDir) { // Use DuckDB for fast Parquet queries @@ -205,6 +168,51 @@ Remote Mode: }, } +func openLocalTUIStore(dbPath, analyticsDir string) (*store.Store, error) { + s, err := openMigratedLocalStore(dbPath) + if err != nil { + return nil, err + } + + if s.IsPostgreSQL() || forceSQL || skipCacheBuild { + return s, nil + } + + if err := s.Close(); err != nil { + return nil, fmt.Errorf("close database before cache build: %w", err) + } + + staleness := cacheNeedsBuild(dbPath, analyticsDir) + if staleness.NeedsBuild { + fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason) + result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err) + fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n") + } else if !result.Skipped { + fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount) + } + } + + return openMigratedLocalStore(dbPath) +} + +func openMigratedLocalStore(dbPath string) (*store.Store, error) { + s, err := store.Open(dbPath) + if err != nil { + return nil, fmt.Errorf("open database: %w", err) + } + if err := s.InitSchema(); err != nil { + _ = s.Close() + return nil, fmt.Errorf("init schema: %w", err) + } + if err := runStartupMigrations(s); err != nil { + _ = s.Close() + return nil, fmt.Errorf("startup migrations: %w", err) + } + return s, nil +} + // cacheStaleness describes why the analytics cache needs a rebuild. type cacheStaleness struct { NeedsBuild bool diff --git a/cmd/msgvault/cmd/tui_test.go b/cmd/msgvault/cmd/tui_test.go index 31122d095..76dd9bf7a 100644 --- a/cmd/msgvault/cmd/tui_test.go +++ b/cmd/msgvault/cmd/tui_test.go @@ -1,9 +1,15 @@ package cmd import ( + "database/sql" + "path/filepath" "testing" + _ "github.com/mattn/go-sqlite3" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.kenn.io/msgvault/internal/config" + "go.kenn.io/msgvault/internal/query" ) func TestTUIDuckDBOptionsDisableSQLiteScannerByDefault(t *testing.T) { @@ -12,3 +18,158 @@ func TestTUIDuckDBOptionsDisableSQLiteScannerByDefault(t *testing.T) { assert.True(t, opts.DisableSQLiteScanner, "TUI must not attach the live SQLite DB through DuckDB sqlite_scanner") } + +func TestOpenLocalTUIStoreMigratesBeforeCacheBuild(t *testing.T) { + savedCfg := cfg + savedForceSQL := forceSQL + savedSkipCacheBuild := skipCacheBuild + t.Cleanup(func() { + cfg = savedCfg + forceSQL = savedForceSQL + skipCacheBuild = savedSkipCacheBuild + }) + cfg = &config.Config{} + forceSQL = false + skipCacheBuild = false + + tmpDir := t.TempDir() + dbPath := filepath.Join(tmpDir, "legacy.db") + analyticsDir := filepath.Join(tmpDir, "analytics") + setupLegacyTUIDBMissingDeletedAt(t, dbPath) + + st, err := openLocalTUIStore(dbPath, analyticsDir) + require.NoError(t, err, "openLocalTUIStore") + t.Cleanup(func() { _ = st.Close() }) + + assert.True(t, query.HasCompleteParquetData(analyticsDir), + "TUI startup should build cache after legacy columns are migrated") +} + +func setupLegacyTUIDBMissingDeletedAt(t *testing.T, dbPath string) { + t.Helper() + db, err := sql.Open("sqlite3", dbPath) + require.NoError(t, err, "open sqlite") + defer func() { _ = db.Close() }() + + _, err = db.Exec(` + CREATE TABLE sources ( + id INTEGER PRIMARY KEY, + source_type TEXT NOT NULL DEFAULT 'gmail', + identifier TEXT NOT NULL, + display_name TEXT, + google_user_id TEXT UNIQUE, + last_sync_at DATETIME, + sync_cursor TEXT, + sync_config JSON, + oauth_app TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE(source_type, identifier) + ); + CREATE TABLE conversations ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_conversation_id TEXT, + conversation_type TEXT NOT NULL DEFAULT 'email_thread', + title TEXT, + participant_count INTEGER DEFAULT 0, + message_count INTEGER DEFAULT 0, + unread_count INTEGER DEFAULT 0, + last_message_at DATETIME, + last_message_preview TEXT, + metadata JSON, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + CREATE TABLE participants ( + id INTEGER PRIMARY KEY, + email_address TEXT, + phone_number TEXT, + display_name TEXT, + domain TEXT, + canonical_id TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_message_id TEXT NOT NULL, + conversation_id INTEGER, + subject TEXT, + snippet TEXT, + sent_at DATETIME, + received_at DATETIME, + internal_date DATETIME, + size_estimate INTEGER, + has_attachments BOOLEAN DEFAULT FALSE, + is_from_me BOOLEAN DEFAULT FALSE, + archived_at DATETIME, + rfc822_message_id TEXT, + sender_id INTEGER, + message_type TEXT NOT NULL DEFAULT 'email', + attachment_count INTEGER DEFAULT 0, + deleted_from_source_at DATETIME, + UNIQUE(source_id, source_message_id) + ); + CREATE TABLE message_recipients ( + id INTEGER PRIMARY KEY, + message_id INTEGER NOT NULL, + participant_id INTEGER NOT NULL, + recipient_type TEXT NOT NULL, + display_name TEXT + ); + CREATE TABLE labels ( + id INTEGER PRIMARY KEY, + source_id INTEGER NOT NULL, + source_label_id TEXT, + name TEXT NOT NULL, + label_type TEXT + ); + CREATE TABLE message_labels ( + message_id INTEGER NOT NULL, + label_id INTEGER NOT NULL, + PRIMARY KEY (message_id, label_id) + ); + CREATE TABLE attachments ( + id INTEGER PRIMARY KEY, + message_id INTEGER NOT NULL, + filename TEXT, + mime_type TEXT, + size INTEGER, + content_hash TEXT, + storage_path TEXT NOT NULL DEFAULT '', + media_type TEXT, + width INTEGER, + height INTEGER, + duration_ms INTEGER, + thumbnail_hash TEXT, + thumbnail_path TEXT, + source_attachment_id TEXT, + attachment_metadata JSON, + encryption_version INTEGER DEFAULT 0, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + + INSERT INTO sources (id, source_type, identifier, display_name) + VALUES (1, 'gmail', 'test@example.com', 'Test Account'); + INSERT INTO conversations (id, source_id, source_conversation_id, title) + VALUES (1, 1, 'thread-1', 'Legacy Thread'); + INSERT INTO participants (id, email_address, domain, display_name) + VALUES (1, 'alice@example.com', 'example.com', 'Alice'), + (2, 'bob@example.com', 'example.com', 'Bob'); + INSERT INTO messages ( + id, source_id, source_message_id, conversation_id, subject, snippet, + sent_at, size_estimate, has_attachments + ) VALUES ( + 1, 1, 'msg-1', 1, 'Legacy message', 'Preview', + '2024-01-02 03:04:05', 1234, 0 + ); + INSERT INTO message_recipients (message_id, participant_id, recipient_type, display_name) + VALUES (1, 1, 'from', 'Alice'), (1, 2, 'to', 'Bob'); + INSERT INTO labels (id, source_id, source_label_id, name, label_type) + VALUES (1, 1, 'INBOX', 'INBOX', 'system'); + INSERT INTO message_labels (message_id, label_id) VALUES (1, 1); + `) + require.NoError(t, err, "create legacy TUI fixture") +}