diff --git a/internal/store/members_profile.go b/internal/store/members_profile.go index 7a0c789..3871e32 100644 --- a/internal/store/members_profile.go +++ b/internal/store/members_profile.go @@ -56,6 +56,7 @@ func (s *Store) rebuildMemberFTS(ctx context.Context) error { if _, err := tx.ExecContext(ctx, `drop table if exists member_fts`); err != nil { return fmt.Errorf("drop member_fts: %w", err) } + // Uses FTS5 default unicode61 tokenizer; query input is normalized by normalizeFTSQuery for operator literalization. if _, err := tx.ExecContext(ctx, ` create virtual table member_fts using fts5( member_key unindexed, diff --git a/internal/store/store.go b/internal/store/store.go index 7da630d..75b4c2c 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -391,6 +391,7 @@ func (s *Store) applyBaselineSchema(ctx context.Context) error { embedded_at text not null, primary key (message_id, provider, model, input_version) );`, + // Uses FTS5 default unicode61 tokenizer; query input is normalized by normalizeFTSQuery for operator literalization. `create virtual table if not exists message_fts using fts5( message_id unindexed, guild_id unindexed, @@ -571,6 +572,7 @@ func (s *Store) rebuildFTS(ctx context.Context) error { if _, err := tx.ExecContext(ctx, `drop table if exists message_fts`); err != nil { return fmt.Errorf("drop message_fts: %w", err) } + // Uses FTS5 default unicode61 tokenizer; query input is normalized by normalizeFTSQuery for operator literalization. if _, err := tx.ExecContext(ctx, ` create virtual table message_fts using fts5( message_id unindexed, diff --git a/internal/store/store_test.go b/internal/store/store_test.go index 5f185c6..3fce402 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -1662,3 +1662,92 @@ func TestListMessagesFiltersAndLimit(t *testing.T) { require.Equal(t, "m2", rows[0].MessageID) require.Equal(t, "m4", rows[1].MessageID) } + +func TestNormalizeFTSQueryEdgeCases(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + raw string + want string + }{ + {name: "empty", raw: "", want: ""}, + {name: "whitespace-only", raw: " \t \n ", want: ""}, + {name: "single-word", raw: "needle", want: `"needle"`}, + {name: "multi-word", raw: "needle haystack", want: `"needle" "haystack"`}, + {name: "operators-as-terms", raw: "AND OR NOT NEAR", want: `"AND" "OR" "NOT" "NEAR"`}, + {name: "embedded-double-quote", raw: `say"hi`, want: `"say hi"`}, + {name: "asterisk-literal", raw: "panic*", want: `"panic*"`}, + {name: "mixed-punctuation", raw: "alpha,(beta):gamma", want: `"alpha,(beta):gamma"`}, + {name: "unicode", raw: "café 東京", want: `"café" "東京"`}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tc.want, normalizeFTSQuery(tc.raw)) + }) + } +} + +func TestSearchMessagesTreatsFTSOperatorsAsLiterals(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + + for _, record := range []MessageRecord{ + { + ID: "m1", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "Peter", + CreatedAt: "2026-04-25T12:00:00Z", + Content: "AND", + NormalizedContent: "AND", + RawJSON: `{"author":{"username":"Peter"}}`, + }, + { + ID: "m2", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u2", + AuthorName: "Other", + CreatedAt: "2026-04-25T12:01:00Z", + Content: "alpha and beta", + NormalizedContent: "alpha and beta", + RawJSON: `{"author":{"username":"Other"}}`, + }, + { + ID: "m3", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u3", + AuthorName: "Another", + CreatedAt: "2026-04-25T12:02:00Z", + Content: "alpha beta", + NormalizedContent: "alpha beta", + RawJSON: `{"author":{"username":"Another"}}`, + }, + } { + require.NoError(t, s.UpsertMessage(ctx, record)) + } + + results, err := s.SearchMessages(ctx, SearchOptions{Query: "AND", Limit: 10}) + require.NoError(t, err) + require.Len(t, results, 2) + + ids := make([]string, 0, len(results)) + for _, result := range results { + ids = append(ids, result.MessageID) + } + require.ElementsMatch(t, []string{"m1", "m2"}, ids) +}