From b5f428628d7ca87890f8f6280a3a420ba2fae119 Mon Sep 17 00:00:00 2001 From: Pi Date: Wed, 18 Mar 2026 12:20:25 +0000 Subject: [PATCH] fix: handle hyphenated and underscore terms in lex and vec/hyde queries Preserve hyphens and underscores in sanitizeFTS5Term so FTS5's unicode61 tokenizer can split them symmetrically at query time, producing precise phrase matches. Also fix validateSemanticQuery false positive that rejected hyphenated terms like DEC-0054 as negation syntax in vec/hyde queries. Complements #404 (underscore-only fix) by also covering hyphens. Refs: #305, #417 --- src/store.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/store.ts b/src/store.ts index f17404d8..1a00e1ff 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2651,7 +2651,7 @@ export function getTopLevelPathsWithoutContext(db: Database, collectionName: str // ============================================================================= function sanitizeFTS5Term(term: string): string { - return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase(); + return term.replace(/[^\p{L}\p{N}'_-]/gu, '').toLowerCase(); } /** @@ -2743,7 +2743,7 @@ function buildFTS5Query(query: string): string | null { */ export function validateSemanticQuery(query: string): string | null { // Check for negation syntax - if (/-\w/.test(query) || /-"/.test(query)) { + if (/(?:^|\s)-[\w"]/.test(query)) { return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.'; } return null;