Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 62 additions & 36 deletions cmd/msgvault/cmd/tui.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,30 +81,14 @@ Remote Mode:
} else {
// Local mode - use local database
dbPath := cfg.DatabaseDSN()
s, err := store.Open(dbPath)
analyticsDir := cfg.AnalyticsDir()

s, err := openLocalTUIStore(dbPath, analyticsDir)
if err != nil {
return fmt.Errorf("open database: %w", err)
return err
}
defer func() { _ = s.Close() }()

// Ensure schema is up to date
if err := s.InitSchema(); err != nil {
return fmt.Errorf("init schema: %w", err)
}
if err := runStartupMigrations(s); err != nil {
return fmt.Errorf("startup migrations: %w", err)
}

// Build FTS index in background — TUI uses DuckDB/Parquet for
// aggregates and only needs FTS for deep search (Tab to switch).
if s.NeedsFTSBackfill() {
go func() {
_, _ = s.BackfillFTS(nil)
}()
}

analyticsDir := cfg.AnalyticsDir()

// The Parquet analytics cache is a SQLite → DuckDB ETL and
// has no meaning when the system of record is PostgreSQL —
// buildCache feeds the DSN to the SQLite driver and
Expand All @@ -114,25 +98,10 @@ Remote Mode:
if s.IsPostgreSQL() {
engine = query.NewEngine(s.DB(), true)
} else {
// Check if cache needs to be built/updated (unless forcing SQL or skipping)
if !forceSQL && !skipCacheBuild {
staleness := cacheNeedsBuild(dbPath, analyticsDir)
if staleness.NeedsBuild {
fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason)
result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild)
if err != nil {
fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err)
fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n")
} else if !result.Skipped {
fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount)
}
}
}

// Determine query engine to use
if !forceSQL && query.HasCompleteParquetData(analyticsDir) {
// Use DuckDB for fast Parquet queries
var duckOpts query.DuckDBOptions
duckOpts := tuiDuckDBOptions()
if noSQLiteScanner {
duckOpts.DisableSQLiteScanner = true
}
Expand All @@ -154,6 +123,14 @@ Remote Mode:
engine = query.NewEngine(s.DB(), false)
}
}

// Build FTS index in background after cache/engine startup. The
// aggregate TUI path uses Parquet; FTS is only needed for deep search.
if s.NeedsFTSBackfill() {
go func() {
_, _ = s.BackfillFTS(nil)
}()
}
}

// Check if engine supports text queries
Expand Down Expand Up @@ -191,6 +168,51 @@ Remote Mode:
},
}

func openLocalTUIStore(dbPath, analyticsDir string) (*store.Store, error) {
s, err := openMigratedLocalStore(dbPath)
if err != nil {
return nil, err
}

if s.IsPostgreSQL() || forceSQL || skipCacheBuild {
return s, nil
}

if err := s.Close(); err != nil {
return nil, fmt.Errorf("close database before cache build: %w", err)
}

staleness := cacheNeedsBuild(dbPath, analyticsDir)
if staleness.NeedsBuild {
fmt.Printf("Building analytics cache (%s)...\n", staleness.Reason)
result, err := buildCache(dbPath, analyticsDir, staleness.FullRebuild)
if err != nil {
fmt.Fprintf(os.Stderr, "Warning: Failed to build cache: %v\n", err)
fmt.Fprintf(os.Stderr, "Falling back to SQLite (may be slow for large archives)\n")
} else if !result.Skipped {
fmt.Printf("Cached %d messages for fast queries.\n", result.ExportedCount)
}
}

return openMigratedLocalStore(dbPath)
}

func openMigratedLocalStore(dbPath string) (*store.Store, error) {
s, err := store.Open(dbPath)
if err != nil {
return nil, fmt.Errorf("open database: %w", err)
}
if err := s.InitSchema(); err != nil {
_ = s.Close()
return nil, fmt.Errorf("init schema: %w", err)
}
if err := runStartupMigrations(s); err != nil {
_ = s.Close()
return nil, fmt.Errorf("startup migrations: %w", err)
}
return s, nil
}

// cacheStaleness describes why the analytics cache needs a rebuild.
type cacheStaleness struct {
NeedsBuild bool
Expand All @@ -201,6 +223,10 @@ type cacheStaleness struct {
Reason string
}

func tuiDuckDBOptions() query.DuckDBOptions {
return query.DuckDBOptions{DisableSQLiteScanner: true}
}

// cacheNeedsBuild checks if the analytics cache needs to be built or
// updated. Collects all staleness signals before returning so that
// e.g. a mixed add+delete sync correctly reports both.
Expand Down
175 changes: 175 additions & 0 deletions cmd/msgvault/cmd/tui_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package cmd

import (
"database/sql"
"path/filepath"
"testing"

_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.kenn.io/msgvault/internal/config"
"go.kenn.io/msgvault/internal/query"
)

func TestTUIDuckDBOptionsDisableSQLiteScannerByDefault(t *testing.T) {
opts := tuiDuckDBOptions()

assert.True(t, opts.DisableSQLiteScanner,
"TUI must not attach the live SQLite DB through DuckDB sqlite_scanner")
}

func TestOpenLocalTUIStoreMigratesBeforeCacheBuild(t *testing.T) {
savedCfg := cfg
savedForceSQL := forceSQL
savedSkipCacheBuild := skipCacheBuild
t.Cleanup(func() {
cfg = savedCfg
forceSQL = savedForceSQL
skipCacheBuild = savedSkipCacheBuild
})
cfg = &config.Config{}
forceSQL = false
skipCacheBuild = false

tmpDir := t.TempDir()
dbPath := filepath.Join(tmpDir, "legacy.db")
analyticsDir := filepath.Join(tmpDir, "analytics")
setupLegacyTUIDBMissingDeletedAt(t, dbPath)

st, err := openLocalTUIStore(dbPath, analyticsDir)
require.NoError(t, err, "openLocalTUIStore")
t.Cleanup(func() { _ = st.Close() })

assert.True(t, query.HasCompleteParquetData(analyticsDir),
"TUI startup should build cache after legacy columns are migrated")
}

func setupLegacyTUIDBMissingDeletedAt(t *testing.T, dbPath string) {
t.Helper()
db, err := sql.Open("sqlite3", dbPath)
require.NoError(t, err, "open sqlite")
defer func() { _ = db.Close() }()

_, err = db.Exec(`
CREATE TABLE sources (
id INTEGER PRIMARY KEY,
source_type TEXT NOT NULL DEFAULT 'gmail',
identifier TEXT NOT NULL,
display_name TEXT,
google_user_id TEXT UNIQUE,
last_sync_at DATETIME,
sync_cursor TEXT,
sync_config JSON,
oauth_app TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE(source_type, identifier)
);
CREATE TABLE conversations (
id INTEGER PRIMARY KEY,
source_id INTEGER NOT NULL,
source_conversation_id TEXT,
conversation_type TEXT NOT NULL DEFAULT 'email_thread',
title TEXT,
participant_count INTEGER DEFAULT 0,
message_count INTEGER DEFAULT 0,
unread_count INTEGER DEFAULT 0,
last_message_at DATETIME,
last_message_preview TEXT,
metadata JSON,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE participants (
id INTEGER PRIMARY KEY,
email_address TEXT,
phone_number TEXT,
display_name TEXT,
domain TEXT,
canonical_id TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE messages (
id INTEGER PRIMARY KEY,
source_id INTEGER NOT NULL,
source_message_id TEXT NOT NULL,
conversation_id INTEGER,
subject TEXT,
snippet TEXT,
sent_at DATETIME,
received_at DATETIME,
internal_date DATETIME,
size_estimate INTEGER,
has_attachments BOOLEAN DEFAULT FALSE,
is_from_me BOOLEAN DEFAULT FALSE,
archived_at DATETIME,
rfc822_message_id TEXT,
sender_id INTEGER,
message_type TEXT NOT NULL DEFAULT 'email',
attachment_count INTEGER DEFAULT 0,
deleted_from_source_at DATETIME,
UNIQUE(source_id, source_message_id)
);
CREATE TABLE message_recipients (
id INTEGER PRIMARY KEY,
message_id INTEGER NOT NULL,
participant_id INTEGER NOT NULL,
recipient_type TEXT NOT NULL,
display_name TEXT
);
CREATE TABLE labels (
id INTEGER PRIMARY KEY,
source_id INTEGER NOT NULL,
source_label_id TEXT,
name TEXT NOT NULL,
label_type TEXT
);
CREATE TABLE message_labels (
message_id INTEGER NOT NULL,
label_id INTEGER NOT NULL,
PRIMARY KEY (message_id, label_id)
);
CREATE TABLE attachments (
id INTEGER PRIMARY KEY,
message_id INTEGER NOT NULL,
filename TEXT,
mime_type TEXT,
size INTEGER,
content_hash TEXT,
storage_path TEXT NOT NULL DEFAULT '',
media_type TEXT,
width INTEGER,
height INTEGER,
duration_ms INTEGER,
thumbnail_hash TEXT,
thumbnail_path TEXT,
source_attachment_id TEXT,
attachment_metadata JSON,
encryption_version INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);

INSERT INTO sources (id, source_type, identifier, display_name)
VALUES (1, 'gmail', 'test@example.com', 'Test Account');
INSERT INTO conversations (id, source_id, source_conversation_id, title)
VALUES (1, 1, 'thread-1', 'Legacy Thread');
INSERT INTO participants (id, email_address, domain, display_name)
VALUES (1, 'alice@example.com', 'example.com', 'Alice'),
(2, 'bob@example.com', 'example.com', 'Bob');
INSERT INTO messages (
id, source_id, source_message_id, conversation_id, subject, snippet,
sent_at, size_estimate, has_attachments
) VALUES (
1, 1, 'msg-1', 1, 'Legacy message', 'Preview',
'2024-01-02 03:04:05', 1234, 0
);
INSERT INTO message_recipients (message_id, participant_id, recipient_type, display_name)
VALUES (1, 1, 'from', 'Alice'), (1, 2, 'to', 'Bob');
INSERT INTO labels (id, source_id, source_label_id, name, label_type)
VALUES (1, 1, 'INBOX', 'INBOX', 'system');
INSERT INTO message_labels (message_id, label_id) VALUES (1, 1);
`)
require.NoError(t, err, "create legacy TUI fixture")
}