Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion internal/search/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,14 @@ var operators = map[string]operatorFn{
q.BccAddrs = append(q.BccAddrs, normalizeAddr(v))
},
"subject": func(q *Query, v string, _ time.Time) {
q.SubjectTerms = append(q.SubjectTerms, v)
// Drop empty/whitespace-only values (e.g. `subject:` or `subject:""`).
// Otherwise the store builds `LOWER(subject) LIKE '%%'`, which matches
// every message instead of being a no-op. Mirrors the label handlers.
// Non-empty punctuation (e.g. `subject:"!!!"`) is a valid literal
// substring search and is preserved.
if v = strings.TrimSpace(v); v != "" {
q.SubjectTerms = append(q.SubjectTerms, v)
}
},
"label": func(q *Query, v string, _ time.Time) {
if v = strings.TrimSpace(v); v != "" {
Expand Down
15 changes: 15 additions & 0 deletions internal/search/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,21 @@ func TestParse(t *testing.T) {
query: `from:"alice@example.com"`,
want: Query{FromAddrs: []string{"alice@example.com"}},
},
{
name: "empty subject value is dropped",
query: `subject:""`,
want: Query{},
},
{
name: "bare subject operator with no value is dropped",
query: `subject:`,
want: Query{},
},
{
name: "punctuation-only subject value is preserved",
query: `subject:"!!!"`,
want: Query{SubjectTerms: []string{"!!!"}},
},
},
},
{
Expand Down
22 changes: 22 additions & 0 deletions internal/store/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,26 @@ func (s *Store) searchMessagesQueryImpl(
// Match each term against subject OR snippet so the no-FTS
// path catches snippet hits, not just subjects. Per CLAUDE.md,
// search queries never scan message_bodies.
added := 0
for _, term := range q.TextTerms {
// Skip terms with no searchable token (empty string,
// punctuation-only). hasFTSToken is the same predicate the
// FTS path uses via BuildFTSArg, so both paths agree on what
// is "tokenless". Without this, term=="" becomes LIKE '%%'
// and matches every message instead of nothing.
if !hasFTSToken(term) {
continue
}
like := "%" + escapeLike(strings.ToLower(term)) + "%"
conditions = append(conditions,
`(LOWER(m.subject) LIKE ? ESCAPE '\' OR LOWER(m.snippet) LIKE ? ESCAPE '\')`)
args = append(args, like, like)
added++
}
if added == 0 {
// All terms were tokenless: substitute FALSE so the LIKE
// fallback returns zero rows, matching the FTS path.
conditions = append(conditions, "FALSE")
}
}

Expand Down Expand Up @@ -453,6 +468,13 @@ func (s *Store) searchMessagesQueryImpl(
// against "Invoice from acme" on PG. Every other LIKE in this
// function already wraps with LOWER.
for _, term := range q.SubjectTerms {
// An empty subject term would build LIKE '%%' and match every
// message; skip it. (The parser already drops empties; this guards
// directly-constructed queries.) Punctuation-only terms are kept —
// the subject filter is a literal substring match, not FTS.
if strings.TrimSpace(term) == "" {
continue
}
conditions = append(conditions,
`LOWER(m.subject) LIKE LOWER(?) ESCAPE '\'`)
args = append(args, "%"+escapeLike(strings.ToLower(term))+"%")
Expand Down
66 changes: 66 additions & 0 deletions internal/store/api_search_nofts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package store

import (
"database/sql"
"fmt"
"testing"

assertpkg "github.com/stretchr/testify/assert"
requirepkg "github.com/stretchr/testify/require"

"go.kenn.io/msgvault/internal/search"
)

// TestSearchMessagesQueryImpl_NoFTS_TokenlessTerms guards the LIKE fallback
// path (ftsAvailable=false), which is reached at runtime when FTS errors or
// when the binary is built without the fts5 tag. A text term that reduces to no
// searchable tokens (empty string, punctuation-only) must yield zero rows via a
// FALSE predicate — never "LOWER(...) LIKE '%%'", which matches every message.
// This mirrors the FTS path's tokenless handling. It forces the no-FTS branch
// directly, so it runs regardless of the fts5 build tag.
func TestSearchMessagesQueryImpl_NoFTS_TokenlessTerms(t *testing.T) {
require := requirepkg.New(t)
st := openTestStore(t)
src, err := st.GetOrCreateSource("gmail", "noftstokenless@example.com")
require.NoError(err, "GetOrCreateSource")
convID, err := st.EnsureConversation(src.ID, "thread-nofts", "Thread NoFTS")
require.NoError(err, "EnsureConversation")

for i, sub := range []string{"invoice attached", "project update"} {
_, err := st.UpsertMessage(&Message{
ConversationID: convID,
SourceID: src.ID,
SourceMessageID: fmt.Sprintf("nofts-msg-%d", i),
MessageType: "email",
Subject: sql.NullString{String: sub, Valid: true},
Snippet: sql.NullString{String: "weekly snippet", Valid: true},
SizeEstimate: 100,
})
require.NoError(err, "UpsertMessage %d", i)
}

// Baseline: a real term still matches via LIKE, proving the setup is wired.
_, total, err := st.searchMessagesQueryImpl(
&search.Query{TextTerms: []string{"invoice"}}, 0, 50, false)
require.NoError(err, "baseline LIKE search")
require.GreaterOrEqual(total, int64(1), "baseline LIKE term must match")

cases := []struct {
name string
terms []string
}{
{"empty_string", []string{""}},
{"only_punctuation", []string{"!!!"}},
{"only_dashes", []string{"---"}},
{"mixed_all_empty", []string{"!!!", "---", ""}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
msgs, total, err := st.searchMessagesQueryImpl(
&search.Query{TextTerms: tc.terms}, 0, 50, false)
requirepkg.NoError(t, err, "searchMessagesQueryImpl(%v)", tc.terms)
assertpkg.Equal(t, int64(0), total, "tokenless terms must match nothing on the LIKE path")
assertpkg.Empty(t, msgs)
})
}
}