diff --git a/internal/search/parser.go b/internal/search/parser.go index ef28b831f..631d00d4f 100644 --- a/internal/search/parser.go +++ b/internal/search/parser.go @@ -124,7 +124,14 @@ var operators = map[string]operatorFn{ q.BccAddrs = append(q.BccAddrs, normalizeAddr(v)) }, "subject": func(q *Query, v string, _ time.Time) { - q.SubjectTerms = append(q.SubjectTerms, v) + // Drop empty/whitespace-only values (e.g. `subject:` or `subject:""`). + // Otherwise the store builds `LOWER(subject) LIKE '%%'`, which matches + // every message instead of being a no-op. Mirrors the label handlers. + // Non-empty punctuation (e.g. `subject:"!!!"`) is a valid literal + // substring search and is preserved. + if v = strings.TrimSpace(v); v != "" { + q.SubjectTerms = append(q.SubjectTerms, v) + } }, "label": func(q *Query, v string, _ time.Time) { if v = strings.TrimSpace(v); v != "" { diff --git a/internal/search/parser_test.go b/internal/search/parser_test.go index 8b69f2686..67afb604e 100644 --- a/internal/search/parser_test.go +++ b/internal/search/parser_test.go @@ -94,6 +94,21 @@ func TestParse(t *testing.T) { query: `from:"alice@example.com"`, want: Query{FromAddrs: []string{"alice@example.com"}}, }, + { + name: "empty subject value is dropped", + query: `subject:""`, + want: Query{}, + }, + { + name: "bare subject operator with no value is dropped", + query: `subject:`, + want: Query{}, + }, + { + name: "punctuation-only subject value is preserved", + query: `subject:"!!!"`, + want: Query{SubjectTerms: []string{"!!!"}}, + }, }, }, { diff --git a/internal/store/api.go b/internal/store/api.go index c71dd14e9..02b63e3a1 100644 --- a/internal/store/api.go +++ b/internal/store/api.go @@ -375,11 +375,26 @@ func (s *Store) searchMessagesQueryImpl( // Match each term against subject OR snippet so the no-FTS // path catches snippet hits, not just subjects. Per CLAUDE.md, // search queries never scan message_bodies. + added := 0 for _, term := range q.TextTerms { + // Skip terms with no searchable token (empty string, + // punctuation-only). hasFTSToken is the same predicate the + // FTS path uses via BuildFTSArg, so both paths agree on what + // is "tokenless". Without this, term=="" becomes LIKE '%%' + // and matches every message instead of nothing. + if !hasFTSToken(term) { + continue + } like := "%" + escapeLike(strings.ToLower(term)) + "%" conditions = append(conditions, `(LOWER(m.subject) LIKE ? ESCAPE '\' OR LOWER(m.snippet) LIKE ? ESCAPE '\')`) args = append(args, like, like) + added++ + } + if added == 0 { + // All terms were tokenless: substitute FALSE so the LIKE + // fallback returns zero rows, matching the FTS path. + conditions = append(conditions, "FALSE") } } @@ -453,6 +468,13 @@ func (s *Store) searchMessagesQueryImpl( // against "Invoice from acme" on PG. Every other LIKE in this // function already wraps with LOWER. for _, term := range q.SubjectTerms { + // An empty subject term would build LIKE '%%' and match every + // message; skip it. (The parser already drops empties; this guards + // directly-constructed queries.) Punctuation-only terms are kept — + // the subject filter is a literal substring match, not FTS. + if strings.TrimSpace(term) == "" { + continue + } conditions = append(conditions, `LOWER(m.subject) LIKE LOWER(?) ESCAPE '\'`) args = append(args, "%"+escapeLike(strings.ToLower(term))+"%") diff --git a/internal/store/api_search_nofts_test.go b/internal/store/api_search_nofts_test.go new file mode 100644 index 000000000..ad3641efa --- /dev/null +++ b/internal/store/api_search_nofts_test.go @@ -0,0 +1,66 @@ +package store + +import ( + "database/sql" + "fmt" + "testing" + + assertpkg "github.com/stretchr/testify/assert" + requirepkg "github.com/stretchr/testify/require" + + "go.kenn.io/msgvault/internal/search" +) + +// TestSearchMessagesQueryImpl_NoFTS_TokenlessTerms guards the LIKE fallback +// path (ftsAvailable=false), which is reached at runtime when FTS errors or +// when the binary is built without the fts5 tag. A text term that reduces to no +// searchable tokens (empty string, punctuation-only) must yield zero rows via a +// FALSE predicate — never "LOWER(...) LIKE '%%'", which matches every message. +// This mirrors the FTS path's tokenless handling. It forces the no-FTS branch +// directly, so it runs regardless of the fts5 build tag. +func TestSearchMessagesQueryImpl_NoFTS_TokenlessTerms(t *testing.T) { + require := requirepkg.New(t) + st := openTestStore(t) + src, err := st.GetOrCreateSource("gmail", "noftstokenless@example.com") + require.NoError(err, "GetOrCreateSource") + convID, err := st.EnsureConversation(src.ID, "thread-nofts", "Thread NoFTS") + require.NoError(err, "EnsureConversation") + + for i, sub := range []string{"invoice attached", "project update"} { + _, err := st.UpsertMessage(&Message{ + ConversationID: convID, + SourceID: src.ID, + SourceMessageID: fmt.Sprintf("nofts-msg-%d", i), + MessageType: "email", + Subject: sql.NullString{String: sub, Valid: true}, + Snippet: sql.NullString{String: "weekly snippet", Valid: true}, + SizeEstimate: 100, + }) + require.NoError(err, "UpsertMessage %d", i) + } + + // Baseline: a real term still matches via LIKE, proving the setup is wired. + _, total, err := st.searchMessagesQueryImpl( + &search.Query{TextTerms: []string{"invoice"}}, 0, 50, false) + require.NoError(err, "baseline LIKE search") + require.GreaterOrEqual(total, int64(1), "baseline LIKE term must match") + + cases := []struct { + name string + terms []string + }{ + {"empty_string", []string{""}}, + {"only_punctuation", []string{"!!!"}}, + {"only_dashes", []string{"---"}}, + {"mixed_all_empty", []string{"!!!", "---", ""}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + msgs, total, err := st.searchMessagesQueryImpl( + &search.Query{TextTerms: tc.terms}, 0, 50, false) + requirepkg.NoError(t, err, "searchMessagesQueryImpl(%v)", tc.terms) + assertpkg.Equal(t, int64(0), total, "tokenless terms must match nothing on the LIKE path") + assertpkg.Empty(t, msgs) + }) + } +}