Skip to content

Commit 1f0d882

Browse files
add configurable minimum prefix length for search queries
This feature addresses the issue of overly broad search matches from very short prefixes by introducing a configurable minimum length requirement for prefix-based searches. Changes: - Added 'min_prefix_length' configuration option (default: 3) - Updated Tokens::prefixes() to accept minLength parameter - Modified HasEncryptedSearchIndex to enforce minimum length during: - Token generation (indexing) - Query execution (searching) - Added comprehensive test coverage (10 new feature tests, 6 unit tests) Behavior: - With min_prefix_length=3 (default): - Searching for "Wi" (2 chars) returns no results - Searching for "Wil" (3+ chars) works normally - Prevents performance issues from single-character searches - Reduces false positives from very short search terms - Exact search is unaffected by this setting Benefits: - Eliminates unwanted matches (e.g., "W" matching "William", "Wendy", "Walter") - Improves search precision - Maintains backwards compatibility (set to 1 for old behavior) - Configurable per environment via ENCRYPTED_SEARCH_MIN_PREFIX Test updates: - Updated existing tests to use min_prefix_length=1 for compatibility - Added MinimumPrefixLengthTest with 10 comprehensive scenarios - Added 6 unit tests for Tokens class minimum length behavior - All 76 tests passing (136 assertions)
1 parent 19b5f5a commit 1f0d882

File tree

8 files changed

+433
-18
lines changed

8 files changed

+433
-18
lines changed

config/encrypted-search.php

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
|--------------------------------------------------------------------------
3737
|
3838
| The maximum number of prefix levels to generate for prefix-based search.
39-
| For example, the term wietse would generate:
39+
| For example, the term "wietse" would generate:
4040
| ["w", "wi", "wie", "wiet", "wiets", "wietse"]
4141
|
4242
| Increasing this value improves search precision for short terms, but
@@ -45,6 +45,29 @@
4545
*/
4646
'max_prefix_depth' => 6,
4747

48+
/*
49+
|--------------------------------------------------------------------------
50+
| Minimum Prefix Length
51+
|--------------------------------------------------------------------------
52+
|
53+
| The minimum number of characters required for prefix-based searches.
54+
| This prevents overly broad matches from very short search terms.
55+
|
56+
| For example, with min_prefix_length = 3:
57+
| - Searching for "Wi" (2 chars) will return no results
58+
| - Searching for "Wil" (3 chars) will work normally
59+
|
60+
| This helps prevent performance issues and reduces false positives
61+
| when users search for very short terms like "a" or "de".
62+
|
63+
| Recommended values:
64+
| - 2: Allow two-character searches (more flexible, more false positives)
65+
| - 3: Require three characters (good balance)
66+
| - 4: Require four characters (very precise, less flexible)
67+
|
68+
*/
69+
'min_prefix_length' => env('ENCRYPTED_SEARCH_MIN_PREFIX', 3),
70+
4871
/*
4972
|--------------------------------------------------------------------------
5073
| Automatic Indexing of Encrypted Casts

src/Support/Tokens.php

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,27 @@ public static function exact(string $normalized, string $pepper): string
6969
* These prefix hashes can be used to implement fast "starts-with"
7070
* queries while maintaining cryptographic privacy.
7171
*
72-
* Example: "alex" with maxDepth=3 yields tokens for "a", "al", "ale".
72+
* Only prefixes at or above the minimum length (from config) are generated.
73+
* This prevents overly broad matches from very short search terms.
74+
*
75+
* Example: "alex" with maxDepth=4, minLength=2 yields tokens for "al", "ale", "alex".
76+
* (skips "a" because it's below minimum length)
7377
*
7478
* @param string $normalized
7579
* The normalized (lowercase, diacritic-free) string.
7680
* @param int $maxDepth
7781
* The maximum number of prefix characters to hash.
7882
* @param string $pepper
7983
* A secret application-level random string from configuration.
84+
* @param int $minLength
85+
* The minimum prefix length to generate (default: 1 for backwards compatibility).
8086
*
8187
* @return string[]
8288
* An array of hex-encoded SHA-256 prefix tokens.
8389
*
8490
* @throws \RuntimeException if pepper is empty
8591
*/
86-
public static function prefixes(string $normalized, int $maxDepth, string $pepper): array
92+
public static function prefixes(string $normalized, int $maxDepth, string $pepper, int $minLength = 1): array
8793
{
8894
if (empty($pepper)) {
8995
throw new \RuntimeException(
@@ -96,7 +102,10 @@ public static function prefixes(string $normalized, int $maxDepth, string $peppe
96102
$len = mb_strlen($normalized, 'UTF-8');
97103
$depth = min($maxDepth, $len);
98104

99-
for ($i = 1; $i <= $depth; $i++) {
105+
// Start from minimum length instead of 1
106+
$start = max(1, $minLength);
107+
108+
for ($i = $start; $i <= $depth; $i++) {
100109
$prefix = mb_substr($normalized, 0, $i, 'UTF-8');
101110
$out[] = hash('sha256', $prefix . $pepper);
102111
}

src/Traits/HasEncryptedSearchIndex.php

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public function updateSearchIndex(): void
7373

7474
$pepper = (string) config('encrypted-search.search_pepper', '');
7575
$max = (int) config('encrypted-search.max_prefix_depth', 6);
76+
$min = (int) config('encrypted-search.min_prefix_length', 1);
7677
$useElastic = config('encrypted-search.elasticsearch.enabled', false);
7778

7879
$rows = [];
@@ -108,7 +109,7 @@ public function updateSearchIndex(): void
108109

109110
// Generate prefix-based tokens
110111
if (!empty($modes['prefix'])) {
111-
foreach (Tokens::prefixes($normalized, $max, $pepper) as $token) {
112+
foreach (Tokens::prefixes($normalized, $max, $pepper, $min) as $token) {
112113
$rows[] = [
113114
'model_type' => static::class,
114115
'model_id' => $this->getKey(),
@@ -275,18 +276,30 @@ public function scopeEncryptedExact(Builder $query, string $field, string $term)
275276
public function scopeEncryptedPrefix(Builder $query, string $field, string $term): Builder
276277
{
277278
$pepper = (string) config('encrypted-search.search_pepper', '');
279+
$minLength = (int) config('encrypted-search.min_prefix_length', 1);
278280
$normalized = Normalizer::normalize($term);
279281

280282
if (!$normalized) {
281283
return $query->whereRaw('1=0');
282284
}
283285

286+
// Check if search term meets minimum length requirement
287+
if (mb_strlen($normalized, 'UTF-8') < $minLength) {
288+
return $query->whereRaw('1=0');
289+
}
290+
284291
$tokens = Tokens::prefixes(
285292
$normalized,
286293
(int) config('encrypted-search.max_prefix_depth', 6),
287-
$pepper
294+
$pepper,
295+
$minLength
288296
);
289297

298+
// If no tokens generated (term too short), return no results
299+
if (empty($tokens)) {
300+
return $query->whereRaw('1=0');
301+
}
302+
290303
// Check if Elasticsearch is enabled
291304
if (config('encrypted-search.elasticsearch.enabled', false)) {
292305
$modelIds = $this->searchElasticsearch($field, $tokens, 'prefix');

tests/Feature/EncryptedSearchIntegrationTest.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ protected function setUp(): void
6767
// Disable Elasticsearch during tests (we test DB index)
6868
config()->set('encrypted-search.elasticsearch.enabled', false);
6969

70+
// Set minimum prefix length to 1 for backwards compatibility in basic tests
71+
config()->set('encrypted-search.min_prefix_length', 1);
72+
7073
// Ensure Eloquent events are active (boot model & dispatcher)
7174
\Illuminate\Database\Eloquent\Model::unsetEventDispatcher();
7275
\Illuminate\Database\Eloquent\Model::setEventDispatcher(app('events'));

tests/Feature/HasEncryptedSearchIndexEdgeCasesTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ protected function setUp(): void
4848

4949
config()->set('encrypted-search.elasticsearch.enabled', false);
5050
config()->set('encrypted-search.search_pepper', 'test-pepper-secret');
51+
config()->set('encrypted-search.min_prefix_length', 1);
5152

5253
\Illuminate\Database\Eloquent\Model::unsetEventDispatcher();
5354
\Illuminate\Database\Eloquent\Model::setEventDispatcher(app('events'));

0 commit comments

Comments
 (0)