From 2d5548240aa027c0e197df2eff9c9ae6794115cb Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 24 Mar 2025 16:28:14 +0000 Subject: [PATCH 1/8] Vectors: Built content vector indexing system --- app/Config/services.php | 10 +++ app/Search/SearchIndex.php | 14 +++- app/Search/Vectors/EntityVectorGenerator.php | 84 +++++++++++++++++++ app/Search/Vectors/SearchVector.php | 16 ++++ .../Services/OpenAiVectorQueryService.php | 36 ++++++++ .../Vectors/Services/VectorQueryService.php | 12 +++ app/Search/Vectors/StoreEntityVectorsJob.php | 28 +++++++ .../Vectors/VectorQueryServiceProvider.php | 38 +++++++++ ..._24_155748_create_search_vectors_table.php | 32 +++++++ 9 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 app/Search/Vectors/EntityVectorGenerator.php create mode 100644 app/Search/Vectors/SearchVector.php create mode 100644 app/Search/Vectors/Services/OpenAiVectorQueryService.php create mode 100644 app/Search/Vectors/Services/VectorQueryService.php create mode 100644 app/Search/Vectors/StoreEntityVectorsJob.php create mode 100644 app/Search/Vectors/VectorQueryServiceProvider.php create mode 100644 database/migrations/2025_03_24_155748_create_search_vectors_table.php diff --git a/app/Config/services.php b/app/Config/services.php index d7345823150..a34b243f07d 100644 --- a/app/Config/services.php +++ b/app/Config/services.php @@ -22,6 +22,16 @@ // Callback URL for social authentication methods 'callback_url' => env('APP_URL', false), + // LLM Service + // Options: openai + 'llm' => env('LLM_SERVICE', ''), + + // OpenAI API-compatible service details + 'openai' => [ + 'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'), + 'key' => env('OPENAI_KEY', ''), + ], + 'github' => [ 'client_id' => env('GITHUB_APP_ID', false), 'client_secret' => env('GITHUB_APP_SECRET', false), diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 844e3584b20..569fddc73f7 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -6,6 +6,8 @@ use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; +use BookStack\Search\Vectors\StoreEntityVectorsJob; +use BookStack\Search\Vectors\VectorQueryServiceProvider; use BookStack\Util\HtmlDocument; use DOMNode; use Illuminate\Database\Eloquent\Builder; @@ -25,7 +27,7 @@ class SearchIndex public static string $softDelimiters = ".-"; public function __construct( - protected EntityProvider $entityProvider + protected EntityProvider $entityProvider, ) { } @@ -37,6 +39,10 @@ public function indexEntity(Entity $entity): void $this->deleteEntityTerms($entity); $terms = $this->entityToTermDataArray($entity); $this->insertTerms($terms); + + if (VectorQueryServiceProvider::isEnabled()) { + dispatch(new StoreEntityVectorsJob($entity)); + } } /** @@ -47,9 +53,15 @@ public function indexEntity(Entity $entity): void public function indexEntities(array $entities): void { $terms = []; + $vectorQueryEnabled = VectorQueryServiceProvider::isEnabled(); + foreach ($entities as $entity) { $entityTerms = $this->entityToTermDataArray($entity); array_push($terms, ...$entityTerms); + + if ($vectorQueryEnabled) { + dispatch(new StoreEntityVectorsJob($entity)); + } } $this->insertTerms($terms); diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Vectors/EntityVectorGenerator.php new file mode 100644 index 00000000000..8a49187736b --- /dev/null +++ b/app/Search/Vectors/EntityVectorGenerator.php @@ -0,0 +1,84 @@ +vectorQueryServiceProvider->get(); + + $text = $this->entityToPlainText($entity); + $chunks = $this->chunkText($text); + $embeddings = $this->chunksToEmbeddings($chunks, $vectorService); + + $this->deleteExistingEmbeddingsForEntity($entity); + $this->storeEmbeddings($embeddings, $chunks, $entity); + } + + protected function deleteExistingEmbeddingsForEntity(Entity $entity): void + { + SearchVector::query() + ->where('entity_type', '=', $entity->getMorphClass()) + ->where('entity_id', '=', $entity->id) + ->delete(); + } + + protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void + { + $toInsert = []; + + foreach ($embeddings as $index => $embedding) { + $text = $textChunks[$index]; + $toInsert[] = [ + 'entity_id' => $entity->id, + 'entity_type' => $entity->getMorphClass(), + 'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'), + 'text' => $text, + ]; + } + + // TODO - Chunk inserts + SearchVector::query()->insert($toInsert); + } + + /** + * @param string[] $chunks + * @return float[] array + */ + protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array + { + $embeddings = []; + foreach ($chunks as $index => $chunk) { + $embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk); + } + return $embeddings; + } + + /** + * @return string[] + */ + protected function chunkText(string $text): array + { + // TODO - Join adjacent smaller chunks up + return array_filter(array_map(function (string $section): string { + return trim($section); + }, explode("\n", $text))); + } + + protected function entityToPlainText(Entity $entity): string + { + $text = $entity->name . "\n\n" . $entity->{$entity->textField}; + // TODO - Add tags + return $text; + } +} diff --git a/app/Search/Vectors/SearchVector.php b/app/Search/Vectors/SearchVector.php new file mode 100644 index 00000000000..4a5555f87d9 --- /dev/null +++ b/app/Search/Vectors/SearchVector.php @@ -0,0 +1,16 @@ +endpoint, '/') . '/' . ltrim($uri, '/'); + $client = $this->http->buildClient(10); + $request = $this->http->jsonRequest($method, $fullUrl, $data) + ->withHeader('Authorization', 'Bearer ' . $this->key); + + $response = $client->sendRequest($request); + return json_decode($response->getBody()->getContents(), true); + } + + public function generateEmbeddings(string $text): array + { + $response = $this->jsonRequest('POST', 'v1/embeddings', [ + 'input' => $text, + 'model' => 'text-embedding-3-small', + ]); + + return $response['data'][0]['embedding']; + } +} diff --git a/app/Search/Vectors/Services/VectorQueryService.php b/app/Search/Vectors/Services/VectorQueryService.php new file mode 100644 index 00000000000..2cc4ed0178f --- /dev/null +++ b/app/Search/Vectors/Services/VectorQueryService.php @@ -0,0 +1,12 @@ +generateAndStore($this->entity); + } +} diff --git a/app/Search/Vectors/VectorQueryServiceProvider.php b/app/Search/Vectors/VectorQueryServiceProvider.php new file mode 100644 index 00000000000..c700307e1f3 --- /dev/null +++ b/app/Search/Vectors/VectorQueryServiceProvider.php @@ -0,0 +1,38 @@ +getServiceName(); + + if ($service === 'openai') { + $key = config('services.openai.key'); + $endpoint = config('services.openai.endpoint'); + return new OpenAiVectorQueryService($endpoint, $key, $this->http); + } + + throw new \Exception("No '{$service}' LLM service found"); + } + + protected static function getServiceName(): string + { + return strtolower(config('services.llm')); + } + + public static function isEnabled(): bool + { + return !empty(static::getServiceName()); + } +} diff --git a/database/migrations/2025_03_24_155748_create_search_vectors_table.php b/database/migrations/2025_03_24_155748_create_search_vectors_table.php new file mode 100644 index 00000000000..d7fb0118a2f --- /dev/null +++ b/database/migrations/2025_03_24_155748_create_search_vectors_table.php @@ -0,0 +1,32 @@ +string('entity_type', 100); + $table->integer('entity_id'); + $table->text('text'); + $table->vector('embedding'); + + $table->index(['entity_type', 'entity_id']); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::dropIfExists('search_vectors'); + } +}; From b9ecf55e1fb773eba2820b31ab91f132770532a6 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 24 Mar 2025 19:51:48 +0000 Subject: [PATCH 2/8] Vectors: Got basic LLM querying working using vector search context --- app/Search/SearchController.php | 16 +++++++++ app/Search/Vectors/EntityVectorGenerator.php | 2 +- .../Services/OpenAiVectorQueryService.php | 21 ++++++++++++ .../Vectors/Services/VectorQueryService.php | 9 +++++ app/Search/Vectors/VectorSearchRunner.php | 33 +++++++++++++++++++ ..._24_155748_create_search_vectors_table.php | 5 ++- resources/views/search/query.blade.php | 29 ++++++++++++++++ routes/web.php | 1 + 8 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 app/Search/Vectors/VectorSearchRunner.php create mode 100644 resources/views/search/query.blade.php diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index 2fce6a3d53f..a688385e7c3 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -6,6 +6,7 @@ use BookStack\Entities\Queries\QueryPopular; use BookStack\Entities\Tools\SiblingFetcher; use BookStack\Http\Controller; +use BookStack\Search\Vectors\VectorSearchRunner; use Illuminate\Http\Request; class SearchController extends Controller @@ -139,4 +140,19 @@ public function searchSiblings(Request $request, SiblingFetcher $siblingFetcher) return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']); } + + public function searchQuery(Request $request, VectorSearchRunner $runner) + { + $query = $request->get('query', ''); + + if ($query) { + $results = $runner->run($query); + } else { + $results = null; + } + + return view('search.query', [ + 'results' => $results, + ]); + } } diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Vectors/EntityVectorGenerator.php index 8a49187736b..9563694a321 100644 --- a/app/Search/Vectors/EntityVectorGenerator.php +++ b/app/Search/Vectors/EntityVectorGenerator.php @@ -42,7 +42,7 @@ protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $toInsert[] = [ 'entity_id' => $entity->id, 'entity_type' => $entity->getMorphClass(), - 'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'), + 'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'), 'text' => $text, ]; } diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Vectors/Services/OpenAiVectorQueryService.php index 8d291099846..e0e145f3ad7 100644 --- a/app/Search/Vectors/Services/OpenAiVectorQueryService.php +++ b/app/Search/Vectors/Services/OpenAiVectorQueryService.php @@ -33,4 +33,25 @@ public function generateEmbeddings(string $text): array return $response['data'][0]['embedding']; } + + public function query(string $input, array $context): string + { + $formattedContext = implode("\n", $context); + + $response = $this->jsonRequest('POST', 'v1/chat/completions', [ + 'model' => 'gpt-4o', + 'messages' => [ + [ + 'role' => 'developer', + 'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.' + ], + [ + 'role' => 'user', + 'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}", + ] + ], + ]); + + return $response['choices'][0]['message']['content'] ?? ''; + } } diff --git a/app/Search/Vectors/Services/VectorQueryService.php b/app/Search/Vectors/Services/VectorQueryService.php index 2cc4ed0178f..746f95f5b22 100644 --- a/app/Search/Vectors/Services/VectorQueryService.php +++ b/app/Search/Vectors/Services/VectorQueryService.php @@ -9,4 +9,13 @@ interface VectorQueryService * @return float[] */ public function generateEmbeddings(string $text): array; + + /** + * Query the LLM service using the given user input, and + * relevant context text retrieved locally via a vector search. + * Returns the response output text from the LLM. + * + * @param string[] $context + */ + public function query(string $input, array $context): string; } diff --git a/app/Search/Vectors/VectorSearchRunner.php b/app/Search/Vectors/VectorSearchRunner.php new file mode 100644 index 00000000000..db28779e403 --- /dev/null +++ b/app/Search/Vectors/VectorSearchRunner.php @@ -0,0 +1,33 @@ +vectorQueryServiceProvider->get(); + $queryVector = $queryService->generateEmbeddings($query); + + // TODO - Apply permissions + // TODO - Join models + $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id') + ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') + ->orderBy('distance', 'asc') + ->limit(10) + ->get(); + + $matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all())); + $llmResult = $queryService->query($query, $matchesText); + + return [ + 'llm_result' => $llmResult, + 'entity_matches' => $topMatches->toArray() + ]; + } +} diff --git a/database/migrations/2025_03_24_155748_create_search_vectors_table.php b/database/migrations/2025_03_24_155748_create_search_vectors_table.php index d7fb0118a2f..1b552b22c9a 100644 --- a/database/migrations/2025_03_24_155748_create_search_vectors_table.php +++ b/database/migrations/2025_03_24_155748_create_search_vectors_table.php @@ -16,10 +16,13 @@ public function up(): void $table->string('entity_type', 100); $table->integer('entity_id'); $table->text('text'); - $table->vector('embedding'); $table->index(['entity_type', 'entity_id']); }); + + $table = DB::getTablePrefix() . 'search_vectors'; + DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)"); + DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine"); } /** diff --git a/resources/views/search/query.blade.php b/resources/views/search/query.blade.php new file mode 100644 index 00000000000..e8b4c84779c --- /dev/null +++ b/resources/views/search/query.blade.php @@ -0,0 +1,29 @@ +@extends('layouts.simple') + +@section('body') +
+ +
+ + +
+ + @if($results) +

Results

+ +

LLM Output

+

{{ $results['llm_result'] }}

+ +

Entity Matches

+ @foreach($results['entity_matches'] as $match) +
+
{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}
+
+ match text +
{{ $match['text'] }}
+
+
+ @endforeach + @endif +
+@stop diff --git a/routes/web.php b/routes/web.php index ea3efe1ac77..74feda0da2d 100644 --- a/routes/web.php +++ b/routes/web.php @@ -189,6 +189,7 @@ // Search Route::get('/search', [SearchController::class, 'search']); + Route::get('/search/query', [SearchController::class, 'searchQuery']); Route::get('/search/book/{bookId}', [SearchController::class, 'searchBook']); Route::get('/search/chapter/{bookId}', [SearchController::class, 'searchChapter']); Route::get('/search/entity/siblings', [SearchController::class, 'searchSiblings']); From e611b3239e94fe2e60f7d9159961998bce829b74 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 25 Mar 2025 19:38:32 +0000 Subject: [PATCH 3/8] Vectors: Added command to regenerate for all Also made models configurable. Tested system scales via 86k vector entries. --- app/Config/services.php | 2 + .../Commands/RegenerateVectorsCommand.php | 46 +++++++++++++++++++ .../Services/OpenAiVectorQueryService.php | 23 +++++++--- .../Vectors/VectorQueryServiceProvider.php | 4 +- app/Search/Vectors/VectorSearchRunner.php | 1 + ..._24_155748_create_search_vectors_table.php | 2 + 6 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 app/Console/Commands/RegenerateVectorsCommand.php diff --git a/app/Config/services.php b/app/Config/services.php index a34b243f07d..aafe0bacc99 100644 --- a/app/Config/services.php +++ b/app/Config/services.php @@ -30,6 +30,8 @@ 'openai' => [ 'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'), 'key' => env('OPENAI_KEY', ''), + 'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'), + 'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'), ], 'github' => [ diff --git a/app/Console/Commands/RegenerateVectorsCommand.php b/app/Console/Commands/RegenerateVectorsCommand.php new file mode 100644 index 00000000000..700d05300d8 --- /dev/null +++ b/app/Console/Commands/RegenerateVectorsCommand.php @@ -0,0 +1,46 @@ +delete(); + + $types = $entityProvider->all(); + foreach ($types as $type => $typeInstance) { + $this->info("Creating jobs to store vectors for {$type} data..."); + /** @var Entity[] $entities */ + $typeInstance->newQuery()->chunkById(100, function ($entities) { + foreach ($entities as $entity) { + dispatch(new StoreEntityVectorsJob($entity)); + } + }); + } + } +} diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Vectors/Services/OpenAiVectorQueryService.php index e0e145f3ad7..fea4d5c1445 100644 --- a/app/Search/Vectors/Services/OpenAiVectorQueryService.php +++ b/app/Search/Vectors/Services/OpenAiVectorQueryService.php @@ -6,17 +6,26 @@ class OpenAiVectorQueryService implements VectorQueryService { + protected string $key; + protected string $endpoint; + protected string $embeddingModel; + protected string $queryModel; + public function __construct( - protected string $endpoint, - protected string $key, + protected array $options, protected HttpRequestService $http, ) { + // TODO - Some kind of validation of options + $this->key = $this->options['key'] ?? ''; + $this->endpoint = $this->options['endpoint'] ?? ''; + $this->embeddingModel = $this->options['embedding_model'] ?? ''; + $this->queryModel = $this->options['query_model'] ?? ''; } protected function jsonRequest(string $method, string $uri, array $data): array { $fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/'); - $client = $this->http->buildClient(10); + $client = $this->http->buildClient(30); $request = $this->http->jsonRequest($method, $fullUrl, $data) ->withHeader('Authorization', 'Bearer ' . $this->key); @@ -28,7 +37,7 @@ public function generateEmbeddings(string $text): array { $response = $this->jsonRequest('POST', 'v1/embeddings', [ 'input' => $text, - 'model' => 'text-embedding-3-small', + 'model' => $this->embeddingModel, ]); return $response['data'][0]['embedding']; @@ -39,15 +48,15 @@ public function query(string $input, array $context): string $formattedContext = implode("\n", $context); $response = $this->jsonRequest('POST', 'v1/chat/completions', [ - 'model' => 'gpt-4o', + 'model' => $this->queryModel, 'messages' => [ [ 'role' => 'developer', - 'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.' + 'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.' ], [ 'role' => 'user', - 'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}", + 'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}", ] ], ]); diff --git a/app/Search/Vectors/VectorQueryServiceProvider.php b/app/Search/Vectors/VectorQueryServiceProvider.php index c700307e1f3..eae7149d03c 100644 --- a/app/Search/Vectors/VectorQueryServiceProvider.php +++ b/app/Search/Vectors/VectorQueryServiceProvider.php @@ -18,9 +18,7 @@ public function get(): VectorQueryService $service = $this->getServiceName(); if ($service === 'openai') { - $key = config('services.openai.key'); - $endpoint = config('services.openai.endpoint'); - return new OpenAiVectorQueryService($endpoint, $key, $this->http); + return new OpenAiVectorQueryService(config('services.openai'), $this->http); } throw new \Exception("No '{$service}' LLM service found"); diff --git a/app/Search/Vectors/VectorSearchRunner.php b/app/Search/Vectors/VectorSearchRunner.php index db28779e403..53b1a4bd696 100644 --- a/app/Search/Vectors/VectorSearchRunner.php +++ b/app/Search/Vectors/VectorSearchRunner.php @@ -19,6 +19,7 @@ public function run(string $query): array $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id') ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') ->orderBy('distance', 'asc') + ->having('distance', '<', 0.6) ->limit(10) ->get(); diff --git a/database/migrations/2025_03_24_155748_create_search_vectors_table.php b/database/migrations/2025_03_24_155748_create_search_vectors_table.php index 1b552b22c9a..0ae67c2256f 100644 --- a/database/migrations/2025_03_24_155748_create_search_vectors_table.php +++ b/database/migrations/2025_03_24_155748_create_search_vectors_table.php @@ -21,6 +21,8 @@ public function up(): void }); $table = DB::getTablePrefix() . 'search_vectors'; + + // TODO - Vector size might need to be dynamic DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)"); DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine"); } From 54f883e815e6640564006aa5f5ceb82b79f8f8b8 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 19 Aug 2025 11:04:14 +0100 Subject: [PATCH 4/8] Improved vector text chunking --- app/Search/SearchController.php | 4 + app/Search/Vectors/EntityVectorGenerator.php | 21 +++--- app/Search/Vectors/TextChunker.php | 77 ++++++++++++++++++++ tests/Search/TextChunkerTest.php | 47 ++++++++++++ 4 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 app/Search/Vectors/TextChunker.php create mode 100644 tests/Search/TextChunkerTest.php diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index a688385e7c3..b5b2b76b679 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -141,8 +141,12 @@ public function searchSiblings(Request $request, SiblingFetcher $siblingFetcher) return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']); } + /** + * Perform a vector/LLM-based query search. + */ public function searchQuery(Request $request, VectorSearchRunner $runner) { + // TODO - Validate if query system is active $query = $request->get('query', ''); if ($query) { diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Vectors/EntityVectorGenerator.php index 9563694a321..5f2a7c17817 100644 --- a/app/Search/Vectors/EntityVectorGenerator.php +++ b/app/Search/Vectors/EntityVectorGenerator.php @@ -2,6 +2,7 @@ namespace BookStack\Search\Vectors; +use BookStack\Activity\Models\Tag; use BookStack\Entities\Models\Entity; use BookStack\Search\Vectors\Services\VectorQueryService; use Illuminate\Support\Facades\DB; @@ -47,8 +48,10 @@ protected function storeEmbeddings(array $embeddings, array $textChunks, Entity ]; } - // TODO - Chunk inserts - SearchVector::query()->insert($toInsert); + $chunks = array_chunk($toInsert, 500); + foreach ($chunks as $chunk) { + SearchVector::query()->insert($chunk); + } } /** @@ -69,16 +72,16 @@ protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQ */ protected function chunkText(string $text): array { - // TODO - Join adjacent smaller chunks up - return array_filter(array_map(function (string $section): string { - return trim($section); - }, explode("\n", $text))); + return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text); } protected function entityToPlainText(Entity $entity): string { - $text = $entity->name . "\n\n" . $entity->{$entity->textField}; - // TODO - Add tags - return $text; + $tags = $entity->tags()->get(); + $tagText = $tags->map(function (Tag $tag) { + return $tag->name . ': ' . $tag->value; + })->join('\n'); + + return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField}; } } diff --git a/app/Search/Vectors/TextChunker.php b/app/Search/Vectors/TextChunker.php new file mode 100644 index 00000000000..3ddf1ad4a46 --- /dev/null +++ b/app/Search/Vectors/TextChunker.php @@ -0,0 +1,77 @@ +delimiterOrder) === 0 || $this->delimiterOrder[count($this->delimiterOrder) - 1] !== '') { + $this->delimiterOrder[] = ''; + } + + if ($this->chunkSize < 1) { + throw new InvalidArgumentException('Chunk size must be greater than 0'); + } + } + + public function chunk(string $text): array + { + $delimiter = $this->delimiterOrder[0]; + $delimiterLength = strlen($delimiter); + $lines = ($delimiter === '') ? str_split($text, $this->chunkSize) : explode($delimiter, $text); + + $cChunk = ''; // Current chunk + $cLength = 0; // Current chunk length + $chunks = []; // Chunks to return + $lDelim = ''; // Last delimiter + + foreach ($lines as $index => $line) { + $lineLength = strlen($line); + if ($cLength + $lineLength + $delimiterLength <= $this->chunkSize) { + $cChunk .= $line . $delimiter; + $cLength += $lineLength + $delimiterLength; + $lDelim = $delimiter; + } else if ($lineLength <= $this->chunkSize) { + $chunks[] = trim($cChunk, $delimiter); + $cChunk = $line . $delimiter; + $cLength = $lineLength + $delimiterLength; + $lDelim = $delimiter; + } else { + $subChunks = new static($this->chunkSize, array_slice($this->delimiterOrder, 1)); + $subDelimiter = $this->delimiterOrder[1] ?? ''; + $subDelimiterLength = strlen($subDelimiter); + foreach ($subChunks->chunk($line) as $subChunk) { + $chunkLength = strlen($subChunk); + if ($cLength + $chunkLength + $subDelimiterLength <= $this->chunkSize) { + $cChunk .= $subChunk . $subDelimiter; + $cLength += $chunkLength + $subDelimiterLength; + $lDelim = $subDelimiter; + } else { + $chunks[] = trim($cChunk, $lDelim); + $cChunk = $subChunk . $subDelimiter; + $cLength = $chunkLength + $subDelimiterLength; + $lDelim = $subDelimiter; + } + } + } + } + + if ($cChunk !== '') { + $chunks[] = trim($cChunk, $lDelim); + } + + return $chunks; + } +} diff --git a/tests/Search/TextChunkerTest.php b/tests/Search/TextChunkerTest.php new file mode 100644 index 00000000000..f78bf11a4ad --- /dev/null +++ b/tests/Search/TextChunkerTest.php @@ -0,0 +1,47 @@ +chunk('123456789'); + + $this->assertEquals(['123', '456', '789'], $chunks); + } + + public function test_chunk_size_must_be_greater_than_zero() + { + $this->expectException(\InvalidArgumentException::class); + $chunker = new TextChunker(-5, []); + } + + public function test_it_works_through_given_delimiters() + { + $chunker = new TextChunker(5, ['-', '.', '']); + $chunks = $chunker->chunk('12-3456.789abcdefg'); + + $this->assertEquals(['12', '3456', '789ab', 'cdefg'], $chunks); + } + + public function test_it_attempts_to_pack_chunks() + { + $chunker = new TextChunker(8, [' ', '']); + $chunks = $chunker->chunk('123 456 789 abc def'); + + $this->assertEquals(['123 456', '789 abc', 'def'], $chunks); + } + + public function test_it_attempts_to_pack_using_subchunks() + { + $chunker = new TextChunker(8, [' ', '-', '']); + $chunks = $chunker->chunk('123 456-789abc'); + + $this->assertEquals(['123 456', '789abc'], $chunks); + } +} From 2c3100e40180289c52c88250e63eefd4a6c52204 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 19 Aug 2025 15:19:04 +0100 Subject: [PATCH 5/8] Vectors: Started front-end work, moved to own controller --- app/Search/QueryController.php | 53 ++++++++++++++++ app/Search/SearchController.php | 21 +------ resources/js/components/index.ts | 1 + resources/js/components/query-manager.ts | 25 ++++++++ resources/sass/_forms.scss | 18 ++++++ resources/views/search/query.blade.php | 79 +++++++++++++++++------- routes/web.php | 6 +- 7 files changed, 159 insertions(+), 44 deletions(-) create mode 100644 app/Search/QueryController.php create mode 100644 resources/js/components/query-manager.ts diff --git a/app/Search/QueryController.php b/app/Search/QueryController.php new file mode 100644 index 00000000000..895d63f138a --- /dev/null +++ b/app/Search/QueryController.php @@ -0,0 +1,53 @@ +get('ask', ''); + + // TODO - Placeholder + $entities = $this->searchRunner->searchEntities(SearchOptions::fromString("cat"), 'all', 1, 20)['results']; + + // TODO - Set page title + + return view('search.query', [ + 'query' => $query, + 'entities' => $entities, + ]); + } + + /** + * Perform a vector/LLM-based query search. + */ + public function run(Request $request, VectorSearchRunner $runner) + { + // TODO - Validate if query system is active + $query = $request->get('query', ''); + + if ($query) { + $results = $runner->run($query); + } else { + $results = null; + } + + return view('search.query', [ + 'results' => $results, + ]); + } +} diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index b5b2b76b679..6ae54b3936a 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -129,7 +129,7 @@ public function searchSuggestions(Request $request) } /** - * Search siblings items in the system. + * Search sibling items in the system. */ public function searchSiblings(Request $request, SiblingFetcher $siblingFetcher) { @@ -140,23 +140,4 @@ public function searchSiblings(Request $request, SiblingFetcher $siblingFetcher) return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']); } - - /** - * Perform a vector/LLM-based query search. - */ - public function searchQuery(Request $request, VectorSearchRunner $runner) - { - // TODO - Validate if query system is active - $query = $request->get('query', ''); - - if ($query) { - $results = $runner->run($query); - } else { - $results = null; - } - - return view('search.query', [ - 'results' => $results, - ]); - } } diff --git a/resources/js/components/index.ts b/resources/js/components/index.ts index 63e1ad0dbf7..dcb28abd760 100644 --- a/resources/js/components/index.ts +++ b/resources/js/components/index.ts @@ -44,6 +44,7 @@ export {PagePicker} from './page-picker'; export {PermissionsTable} from './permissions-table'; export {Pointer} from './pointer'; export {Popup} from './popup'; +export {QueryManager} from './query-manager'; export {SettingAppColorScheme} from './setting-app-color-scheme'; export {SettingColorPicker} from './setting-color-picker'; export {SettingHomepageControl} from './setting-homepage-control'; diff --git a/resources/js/components/query-manager.ts b/resources/js/components/query-manager.ts new file mode 100644 index 00000000000..9252c543ddf --- /dev/null +++ b/resources/js/components/query-manager.ts @@ -0,0 +1,25 @@ +import {Component} from "./component"; + +export class QueryManager extends Component { + protected input!: HTMLTextAreaElement; + protected generatedLoading!: HTMLElement; + protected generatedDisplay!: HTMLElement; + protected contentLoading!: HTMLElement; + protected contentDisplay!: HTMLElement; + protected form!: HTMLFormElement; + + setup() { + this.input = this.$refs.input as HTMLTextAreaElement; + this.form = this.$refs.form as HTMLFormElement; + this.generatedLoading = this.$refs.generatedLoading; + this.generatedDisplay = this.$refs.generatedDisplay; + this.contentLoading = this.$refs.contentLoading; + this.contentDisplay = this.$refs.contentDisplay; + + // TODO - Start lookup if query set + + // TODO - Update URL on query change + + // TODO - Handle query form submission + } +} \ No newline at end of file diff --git a/resources/sass/_forms.scss b/resources/sass/_forms.scss index 12fb3385f96..61f46201c21 100644 --- a/resources/sass/_forms.scss +++ b/resources/sass/_forms.scss @@ -597,3 +597,21 @@ input.shortcut-input { max-width: 120px; height: auto; } + +.query-form { + display: flex; + flex-direction: row; + gap: vars.$m; + textarea { + font-size: 1.4rem; + height: 100px; + box-shadow: vars.$bs-card; + border-radius: 8px; + color: #444; + } + button { + align-self: start; + margin: 0; + font-size: 1.6rem; + } +} \ No newline at end of file diff --git a/resources/views/search/query.blade.php b/resources/views/search/query.blade.php index e8b4c84779c..48cb1eeafe0 100644 --- a/resources/views/search/query.blade.php +++ b/resources/views/search/query.blade.php @@ -1,29 +1,62 @@ @extends('layouts.simple') @section('body') -
- -
- - -
- - @if($results) -

Results

- -

LLM Output

-

{{ $results['llm_result'] }}

- -

Entity Matches

- @foreach($results['entity_matches'] as $match) -
-
{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}
-
- match text -
{{ $match['text'] }}
-
+
+ +
+

Start a Query

+
+ + +
+
+ +
+

Generated Response

+
+ @include('common.loading-icon') +
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Ad adipisci aliquid architecto cupiditate dolor doloribus eligendi et expedita facilis fugiat fugit illo, ipsa laboriosam maiores, molestias mollitia non obcaecati porro quasi quis quos reprehenderit rerum sunt tenetur ullam unde voluptate voluptates! Distinctio et eum id molestiae nisi quisquam sed ut.

+
+ + +
+

Relevant Content

+
+ @include('common.loading-icon') +
+
+
+ @include('entities.list', ['entities' => $entities, 'showPath' => true, 'showTags' => true])
- @endforeach - @endif +
+
+ +{{-- @if($results)--}} +{{--

Results

--}} + +{{--

LLM Output

--}} +{{--

{{ $results['llm_result'] }}

--}} + +{{--

Entity Matches

--}} +{{-- @foreach($results['entity_matches'] as $match)--}} +{{--
--}} +{{--
{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}
--}} +{{--
--}} +{{-- match text--}} +{{--
{{ $match['text'] }}
--}} +{{--
--}} +{{--
--}} +{{-- @endforeach--}} +{{-- @endif--}}
@stop diff --git a/routes/web.php b/routes/web.php index 74feda0da2d..9982c044af6 100644 --- a/routes/web.php +++ b/routes/web.php @@ -11,6 +11,7 @@ use BookStack\Http\Middleware\VerifyCsrfToken; use BookStack\Permissions\PermissionsController; use BookStack\References\ReferenceController; +use BookStack\Search\QueryController; use BookStack\Search\SearchController; use BookStack\Settings as SettingControllers; use BookStack\Sorting as SortingControllers; @@ -189,7 +190,6 @@ // Search Route::get('/search', [SearchController::class, 'search']); - Route::get('/search/query', [SearchController::class, 'searchQuery']); Route::get('/search/book/{bookId}', [SearchController::class, 'searchBook']); Route::get('/search/chapter/{bookId}', [SearchController::class, 'searchChapter']); Route::get('/search/entity/siblings', [SearchController::class, 'searchSiblings']); @@ -197,6 +197,10 @@ Route::get('/search/entity-selector-templates', [SearchController::class, 'templatesForSelector']); Route::get('/search/suggest', [SearchController::class, 'searchSuggestions']); + // Queries + Route::get('/query', [QueryController::class, 'show']); + Route::post('/query', [QueryController::class, 'run']); + // User Search Route::get('/search/users/select', [UserControllers\UserSearchController::class, 'forSelect']); From 88ccd9e5b9f45964d616f2fa7d6fa18abb768a33 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 21 Aug 2025 12:14:52 +0100 Subject: [PATCH 6/8] Vectors: Split out vector search and llm query runs Added a formal object type to carry across vector search results. Added permission application and entity combining with vector search results. Also updated namespace from vectors to queries. --- .../Commands/RegenerateVectorsCommand.php | 4 +- .../EntityVectorGenerator.php | 6 ++- app/Search/Queries/LlmQueryRunner.php | 26 +++++++++ app/Search/{ => Queries}/QueryController.php | 19 +++---- app/Search/Queries/SearchVector.php | 26 +++++++++ .../Services/OpenAiVectorQueryService.php | 2 +- .../Services/VectorQueryService.php | 2 +- .../StoreEntityVectorsJob.php | 4 +- .../{Vectors => Queries}/TextChunker.php | 4 +- .../VectorQueryServiceProvider.php | 8 +-- app/Search/Queries/VectorSearchResult.php | 17 ++++++ app/Search/Queries/VectorSearchRunner.php | 54 +++++++++++++++++++ app/Search/SearchController.php | 2 +- app/Search/SearchIndex.php | 4 +- app/Search/Vectors/SearchVector.php | 16 ------ app/Search/Vectors/VectorSearchRunner.php | 34 ------------ routes/web.php | 3 +- tests/Search/TextChunkerTest.php | 2 +- 18 files changed, 155 insertions(+), 78 deletions(-) rename app/Search/{Vectors => Queries}/EntityVectorGenerator.php (95%) create mode 100644 app/Search/Queries/LlmQueryRunner.php rename app/Search/{ => Queries}/QueryController.php (71%) create mode 100644 app/Search/Queries/SearchVector.php rename app/Search/{Vectors => Queries}/Services/OpenAiVectorQueryService.php (98%) rename app/Search/{Vectors => Queries}/Services/VectorQueryService.php (91%) rename app/Search/{Vectors => Queries}/StoreEntityVectorsJob.php (89%) rename app/Search/{Vectors => Queries}/TextChunker.php (97%) rename app/Search/{Vectors => Queries}/VectorQueryServiceProvider.php (79%) create mode 100644 app/Search/Queries/VectorSearchResult.php create mode 100644 app/Search/Queries/VectorSearchRunner.php delete mode 100644 app/Search/Vectors/SearchVector.php delete mode 100644 app/Search/Vectors/VectorSearchRunner.php diff --git a/app/Console/Commands/RegenerateVectorsCommand.php b/app/Console/Commands/RegenerateVectorsCommand.php index 700d05300d8..26259e94345 100644 --- a/app/Console/Commands/RegenerateVectorsCommand.php +++ b/app/Console/Commands/RegenerateVectorsCommand.php @@ -4,8 +4,8 @@ use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; -use BookStack\Search\Vectors\SearchVector; -use BookStack\Search\Vectors\StoreEntityVectorsJob; +use BookStack\Search\Queries\SearchVector; +use BookStack\Search\Queries\StoreEntityVectorsJob; use Illuminate\Console\Command; class RegenerateVectorsCommand extends Command diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Queries/EntityVectorGenerator.php similarity index 95% rename from app/Search/Vectors/EntityVectorGenerator.php rename to app/Search/Queries/EntityVectorGenerator.php index 5f2a7c17817..34e37eb0343 100644 --- a/app/Search/Vectors/EntityVectorGenerator.php +++ b/app/Search/Queries/EntityVectorGenerator.php @@ -1,10 +1,12 @@ vectorQueryServiceProvider->get(); + + $matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults)); + return $queryService->query($query, $matchesText); + } +} diff --git a/app/Search/QueryController.php b/app/Search/Queries/QueryController.php similarity index 71% rename from app/Search/QueryController.php rename to app/Search/Queries/QueryController.php index 895d63f138a..95888a88f71 100644 --- a/app/Search/QueryController.php +++ b/app/Search/Queries/QueryController.php @@ -1,9 +1,10 @@ get('query', ''); - if ($query) { - $results = $runner->run($query); - } else { - $results = null; - } - - return view('search.query', [ - 'results' => $results, - ]); + $results = $query ? $searchRunner->run($query) : []; + $llmResult = $llmRunner->run($query, $results); + dd($results, $llmResult); } } diff --git a/app/Search/Queries/SearchVector.php b/app/Search/Queries/SearchVector.php new file mode 100644 index 00000000000..fcad45da608 --- /dev/null +++ b/app/Search/Queries/SearchVector.php @@ -0,0 +1,26 @@ +hasMany(JointPermission::class, 'entity_id', 'entity_id') + ->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type'); + } +} diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Queries/Services/OpenAiVectorQueryService.php similarity index 98% rename from app/Search/Vectors/Services/OpenAiVectorQueryService.php rename to app/Search/Queries/Services/OpenAiVectorQueryService.php index fea4d5c1445..9bd9080ba11 100644 --- a/app/Search/Vectors/Services/OpenAiVectorQueryService.php +++ b/app/Search/Queries/Services/OpenAiVectorQueryService.php @@ -1,6 +1,6 @@ vectorQueryServiceProvider->get(); + $queryVector = $queryService->generateEmbeddings($query); + + // TODO - Test permissions applied + $topMatchesQuery = SearchVector::query()->select('text', 'entity_type', 'entity_id') + ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') + ->orderBy('distance', 'asc') + ->having('distance', '<', 0.6) + ->limit(10); + + $query = $this->permissions->restrictEntityRelationQuery($topMatchesQuery, 'search_vectors', 'entity_id', 'entity_type'); + $topMatches = $query->get(); + + $this->entityLoader->loadIntoRelations($topMatches->all(), 'entity', true); + + $results = []; + + foreach ($topMatches as $match) { + if ($match->relationLoaded('entity')) { + $results[] = new VectorSearchResult( + $match->getRelation('entity'), + $match->getAttribute('distance'), + $match->getAttribute('text'), + ); + } + } + + return $results; + } +} diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index 6ae54b3936a..9050f65f512 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -6,7 +6,7 @@ use BookStack\Entities\Queries\QueryPopular; use BookStack\Entities\Tools\SiblingFetcher; use BookStack\Http\Controller; -use BookStack\Search\Vectors\VectorSearchRunner; +use BookStack\Search\Queries\VectorSearchRunner; use Illuminate\Http\Request; class SearchController extends Controller diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 569fddc73f7..aaee97fe747 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -6,8 +6,8 @@ use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; -use BookStack\Search\Vectors\StoreEntityVectorsJob; -use BookStack\Search\Vectors\VectorQueryServiceProvider; +use BookStack\Search\Queries\StoreEntityVectorsJob; +use BookStack\Search\Queries\VectorQueryServiceProvider; use BookStack\Util\HtmlDocument; use DOMNode; use Illuminate\Database\Eloquent\Builder; diff --git a/app/Search/Vectors/SearchVector.php b/app/Search/Vectors/SearchVector.php deleted file mode 100644 index 4a5555f87d9..00000000000 --- a/app/Search/Vectors/SearchVector.php +++ /dev/null @@ -1,16 +0,0 @@ -vectorQueryServiceProvider->get(); - $queryVector = $queryService->generateEmbeddings($query); - - // TODO - Apply permissions - // TODO - Join models - $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id') - ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') - ->orderBy('distance', 'asc') - ->having('distance', '<', 0.6) - ->limit(10) - ->get(); - - $matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all())); - $llmResult = $queryService->query($query, $matchesText); - - return [ - 'llm_result' => $llmResult, - 'entity_matches' => $topMatches->toArray() - ]; - } -} diff --git a/routes/web.php b/routes/web.php index 9982c044af6..d27855100f3 100644 --- a/routes/web.php +++ b/routes/web.php @@ -11,7 +11,7 @@ use BookStack\Http\Middleware\VerifyCsrfToken; use BookStack\Permissions\PermissionsController; use BookStack\References\ReferenceController; -use BookStack\Search\QueryController; +use BookStack\Search\Queries\QueryController; use BookStack\Search\SearchController; use BookStack\Settings as SettingControllers; use BookStack\Sorting as SortingControllers; @@ -199,6 +199,7 @@ // Queries Route::get('/query', [QueryController::class, 'show']); + Route::get('/query/run', [QueryController::class, 'run']); // TODO - Development only, remove Route::post('/query', [QueryController::class, 'run']); // User Search diff --git a/tests/Search/TextChunkerTest.php b/tests/Search/TextChunkerTest.php index f78bf11a4ad..c742c4a6402 100644 --- a/tests/Search/TextChunkerTest.php +++ b/tests/Search/TextChunkerTest.php @@ -2,7 +2,7 @@ namespace Search; -use BookStack\Search\Vectors\TextChunker; +use BookStack\Search\Queries\TextChunker; use Tests\TestCase; class TextChunkerTest extends TestCase From 8eef5a1ee7b423aa07aac83ad0b044616486a3ba Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 21 Aug 2025 16:03:55 +0100 Subject: [PATCH 7/8] Vectors: Updated query response to use server-side-events Allowing the vector query results and the LLM response to each come back over the same HTTP request at two different times via a somewhat standard. Uses a package for JS SSE client, since native browser client does not support over POST, which is probably important for this endpoint as we don't want crawlers or other bots abusing this via accidentally. --- app/Search/Queries/QueryController.php | 11 +++++--- package-lock.json | 22 ++++++++++++++++ package.json | 1 + resources/js/components/query-manager.ts | 32 ++++++++++++++++++++++++ resources/js/services/http.ts | 29 ++++++++++++++++++--- 5 files changed, 88 insertions(+), 7 deletions(-) diff --git a/app/Search/Queries/QueryController.php b/app/Search/Queries/QueryController.php index 95888a88f71..cfaf2e9205e 100644 --- a/app/Search/Queries/QueryController.php +++ b/app/Search/Queries/QueryController.php @@ -41,8 +41,13 @@ public function run(Request $request, VectorSearchRunner $searchRunner, LlmQuery // TODO - Validate if query system is active $query = $request->get('query', ''); - $results = $query ? $searchRunner->run($query) : []; - $llmResult = $llmRunner->run($query, $results); - dd($results, $llmResult); + return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) { + $results = $query ? $searchRunner->run($query) : []; + + $count = count($results); + yield "Found {$count} results for query: {$query}!"; + $llmResult = $llmRunner->run($query, $results); + yield "LLM result: {$llmResult}"; + }); } } diff --git a/package-lock.json b/package-lock.json index 079e397700a..86bdc05e8b0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,7 @@ "@ssddanbrown/codemirror-lang-twig": "^1.0.0", "@types/jest": "^29.5.14", "codemirror": "^6.0.1", + "eventsource-client": "^1.1.4", "idb-keyval": "^6.2.1", "markdown-it": "^14.1.0", "markdown-it-task-lists": "^2.1.1", @@ -4336,6 +4337,27 @@ "node": ">=0.10.0" } }, + "node_modules/eventsource-client": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/eventsource-client/-/eventsource-client-1.1.4.tgz", + "integrity": "sha512-CKnqZTwXCnHN2EqrEB9eLSjMMRqHum09VOsikkgSPoa2Jr2XgQnX7P1Fxhnnj/UHxi3GQ2xVsXDKIktEes07bg==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.5.tgz", + "integrity": "sha512-bSRG85ZrMdmWtm7qkF9He9TNRzc/Bm99gEJMaQoHJ9E6Kv9QBbsldh2oMj7iXmYNEAVvNgvv5vPorG6W+XtBhQ==", + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", diff --git a/package.json b/package.json index 151338d8c6e..637457a9369 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "@ssddanbrown/codemirror-lang-twig": "^1.0.0", "@types/jest": "^29.5.14", "codemirror": "^6.0.1", + "eventsource-client": "^1.1.4", "idb-keyval": "^6.2.1", "markdown-it": "^14.1.0", "markdown-it-task-lists": "^2.1.1", diff --git a/resources/js/components/query-manager.ts b/resources/js/components/query-manager.ts index 9252c543ddf..40a71489b07 100644 --- a/resources/js/components/query-manager.ts +++ b/resources/js/components/query-manager.ts @@ -1,4 +1,5 @@ import {Component} from "./component"; +import {createEventSource} from "eventsource-client"; export class QueryManager extends Component { protected input!: HTMLTextAreaElement; @@ -21,5 +22,36 @@ export class QueryManager extends Component { // TODO - Update URL on query change // TODO - Handle query form submission + this.form.addEventListener('submit', event => { + event.preventDefault(); + this.runQuery(); + }); + } + + async runQuery() { + this.contentLoading.hidden = false; + this.generatedLoading.hidden = false; + this.contentDisplay.innerHTML = ''; + this.generatedDisplay.innerHTML = ''; + + const query = this.input.value; + const es = window.$http.eventSource('/query', 'POST', {query}); + + let messageCount = 0; + for await (const {data, event, id} of es) { + messageCount++; + if (messageCount === 1) { + // Entity results + this.contentDisplay.innerText = data; // TODO - Update to HTML + this.contentLoading.hidden = true; + } else if (messageCount === 2) { + // LLM Output + this.generatedDisplay.innerText = data; // TODO - Update to HTML + this.generatedLoading.hidden = true; + } else { + es.close() + break; + } + } } } \ No newline at end of file diff --git a/resources/js/services/http.ts b/resources/js/services/http.ts index f9eaafc3912..07f150220b4 100644 --- a/resources/js/services/http.ts +++ b/resources/js/services/http.ts @@ -1,3 +1,5 @@ +import {createEventSource, EventSourceClient} from "eventsource-client"; + type ResponseData = Record|string; type RequestOptions = { @@ -59,7 +61,6 @@ export class HttpManager { } createXMLHttpRequest(method: string, url: string, events: Record void> = {}): XMLHttpRequest { - const csrfToken = document.querySelector('meta[name=token]')?.getAttribute('content'); const req = new XMLHttpRequest(); for (const [eventName, callback] of Object.entries(events)) { @@ -68,7 +69,7 @@ export class HttpManager { req.open(method, url); req.withCredentials = true; - req.setRequestHeader('X-CSRF-TOKEN', csrfToken || ''); + req.setRequestHeader('X-CSRF-TOKEN', this.getCSRFToken()); return req; } @@ -95,12 +96,11 @@ export class HttpManager { requestUrl = urlObj.toString(); } - const csrfToken = document.querySelector('meta[name=token]')?.getAttribute('content') || ''; const requestOptions: RequestInit = {...options, credentials: 'same-origin'}; requestOptions.headers = { ...requestOptions.headers || {}, baseURL: window.baseUrl(''), - 'X-CSRF-TOKEN': csrfToken, + 'X-CSRF-TOKEN': this.getCSRFToken(), }; const response = await fetch(requestUrl, requestOptions); @@ -191,6 +191,27 @@ export class HttpManager { return this.dataRequest('DELETE', url, data); } + eventSource(url: string, method: string = 'GET', body: object = {}): EventSourceClient { + if (!url.startsWith('http')) { + url = window.baseUrl(url); + } + + return createEventSource({ + url, + method, + body: JSON.stringify(body), + credentials: 'same-origin', + headers: { + 'Content-Type': 'application/json', + 'X-CSRF-TOKEN': this.getCSRFToken(), + } + }); + } + + protected getCSRFToken(): string { + return document.querySelector('meta[name=token]')?.getAttribute('content') || ''; + } + /** * Parse the response text for an error response to a user * presentable string. Handles a range of errors responses including From bb08f62327a7c7feab901bd89b832da79b832a3f Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 22 Aug 2025 12:59:32 +0100 Subject: [PATCH 8/8] Vectors: Finished core fetch & display functionality --- app/Search/Queries/QueryController.php | 30 +++++++++----- resources/js/components/query-manager.ts | 38 +++++++++++++---- resources/sass/_forms.scss | 8 ++++ resources/views/search/query.blade.php | 52 ++++++++++-------------- 4 files changed, 77 insertions(+), 51 deletions(-) diff --git a/app/Search/Queries/QueryController.php b/app/Search/Queries/QueryController.php index cfaf2e9205e..4d8c71b6184 100644 --- a/app/Search/Queries/QueryController.php +++ b/app/Search/Queries/QueryController.php @@ -3,7 +3,6 @@ namespace BookStack\Search\Queries; use BookStack\Http\Controller; -use BookStack\Search\SearchOptions; use BookStack\Search\SearchRunner; use Illuminate\Http\Request; @@ -12,6 +11,13 @@ class QueryController extends Controller public function __construct( protected SearchRunner $searchRunner, ) { + // TODO - Check via testing + $this->middleware(function ($request, $next) { + if (!VectorQueryServiceProvider::isEnabled()) { + $this->showPermissionError('/'); + } + return $next($request); + }); } /** @@ -19,17 +25,12 @@ public function __construct( */ public function show(Request $request) { - // TODO - Validate if query system is active $query = $request->get('ask', ''); - // TODO - Placeholder - $entities = $this->searchRunner->searchEntities(SearchOptions::fromString("cat"), 'all', 1, 20)['results']; - // TODO - Set page title return view('search.query', [ 'query' => $query, - 'entities' => $entities, ]); } @@ -38,16 +39,23 @@ public function show(Request $request) */ public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner) { - // TODO - Validate if query system is active + // TODO - Rate limiting $query = $request->get('query', ''); return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) { $results = $query ? $searchRunner->run($query) : []; - $count = count($results); - yield "Found {$count} results for query: {$query}!"; - $llmResult = $llmRunner->run($query, $results); - yield "LLM result: {$llmResult}"; + $entities = []; + foreach ($results as $result) { + $entityKey = $result->entity->getMorphClass() . ':' . $result->entity->id; + if (!isset($entities[$entityKey])) { + $entities[$entityKey] = $result->entity; + } + } + + yield ['view' => view('entities.list', ['entities' => $entities])->render()]; + + yield ['result' => $llmRunner->run($query, $results)]; }); } } diff --git a/resources/js/components/query-manager.ts b/resources/js/components/query-manager.ts index 40a71489b07..91bd63a2293 100644 --- a/resources/js/components/query-manager.ts +++ b/resources/js/components/query-manager.ts @@ -1,5 +1,4 @@ import {Component} from "./component"; -import {createEventSource} from "eventsource-client"; export class QueryManager extends Component { protected input!: HTMLTextAreaElement; @@ -8,33 +7,52 @@ export class QueryManager extends Component { protected contentLoading!: HTMLElement; protected contentDisplay!: HTMLElement; protected form!: HTMLFormElement; + protected fieldset!: HTMLFieldSetElement; setup() { this.input = this.$refs.input as HTMLTextAreaElement; this.form = this.$refs.form as HTMLFormElement; + this.fieldset = this.$refs.fieldset as HTMLFieldSetElement; this.generatedLoading = this.$refs.generatedLoading; this.generatedDisplay = this.$refs.generatedDisplay; this.contentLoading = this.$refs.contentLoading; this.contentDisplay = this.$refs.contentDisplay; - // TODO - Start lookup if query set + this.setupListeners(); - // TODO - Update URL on query change + // Start lookup if a query is set + if (this.input.value.trim() !== '') { + this.runQuery(); + } + } - // TODO - Handle query form submission + protected setupListeners(): void { + // Handle form submission this.form.addEventListener('submit', event => { event.preventDefault(); this.runQuery(); }); + + // Allow Ctrl+Enter to run a query + this.input.addEventListener('keydown', event => { + if (event.key === 'Enter' && event.ctrlKey && this.input.value.trim() !== '') { + this.runQuery(); + } + }); } - async runQuery() { + protected async runQuery(): Promise { this.contentLoading.hidden = false; this.generatedLoading.hidden = false; this.contentDisplay.innerHTML = ''; this.generatedDisplay.innerHTML = ''; + this.fieldset.disabled = true; + + const query = this.input.value.trim(); + const url = new URL(window.location.href); + url.searchParams.set('ask', query); + window.history.pushState({}, '', url.toString()); - const query = this.input.value; const es = window.$http.eventSource('/query', 'POST', {query}); let messageCount = 0; @@ -42,16 +60,18 @@ export class QueryManager extends Component { messageCount++; if (messageCount === 1) { // Entity results - this.contentDisplay.innerText = data; // TODO - Update to HTML + this.contentDisplay.innerHTML = JSON.parse(data).view; this.contentLoading.hidden = true; } else if (messageCount === 2) { // LLM Output - this.generatedDisplay.innerText = data; // TODO - Update to HTML + this.generatedDisplay.innerText = JSON.parse(data).result; this.generatedLoading.hidden = true; } else { - es.close() + es.close(); break; } } + + this.fieldset.disabled = false; } } \ No newline at end of file diff --git a/resources/sass/_forms.scss b/resources/sass/_forms.scss index 61f46201c21..ff17cf52745 100644 --- a/resources/sass/_forms.scss +++ b/resources/sass/_forms.scss @@ -614,4 +614,12 @@ input.shortcut-input { margin: 0; font-size: 1.6rem; } + button:disabled { + opacity: 0.5; + cursor: not-allowed; + } + textarea:disabled { + opacity: 0.5; + cursor: not-allowed; + } } \ No newline at end of file diff --git a/resources/views/search/query.blade.php b/resources/views/search/query.blade.php index 48cb1eeafe0..3293c0ddc9b 100644 --- a/resources/views/search/query.blade.php +++ b/resources/views/search/query.blade.php @@ -8,55 +8,45 @@
- - + method="post"> +
+ + +

Generated Response

-
+ -

Lorem ipsum dolor sit amet, consectetur adipisicing elit. Ad adipisci aliquid architecto cupiditate dolor doloribus eligendi et expedita facilis fugiat fugit illo, ipsa laboriosam maiores, molestias mollitia non obcaecati porro quasi quis quos reprehenderit rerum sunt tenetur ullam unde voluptate voluptates! Distinctio et eum id molestiae nisi quisquam sed ut.

+

+ + When you run a query, the relevant content found & shown below will be used to help generate a smart machine generated response. + +

Relevant Content

-
+
- @include('entities.list', ['entities' => $entities, 'showPath' => true, 'showTags' => true]) +

+ Start a query to find relevant matching content. + The items shown here reflect those used to help provide the above response. +

- -{{-- @if($results)--}} -{{--

Results

--}} - -{{--

LLM Output

--}} -{{--

{{ $results['llm_result'] }}

--}} - -{{--

Entity Matches

--}} -{{-- @foreach($results['entity_matches'] as $match)--}} -{{--
--}} -{{--
{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}
--}} -{{--
--}} -{{-- match text--}} -{{--
{{ $match['text'] }}
--}} -{{--
--}} -{{--
--}} -{{-- @endforeach--}} -{{-- @endif--}}
@stop