Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions app/Config/services.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@
// Callback URL for social authentication methods
'callback_url' => env('APP_URL', false),

// LLM Service
// Options: openai
'llm' => env('LLM_SERVICE', ''),

// OpenAI API-compatible service details
'openai' => [
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
'key' => env('OPENAI_KEY', ''),
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
],

'github' => [
'client_id' => env('GITHUB_APP_ID', false),
'client_secret' => env('GITHUB_APP_SECRET', false),
Expand Down
46 changes: 46 additions & 0 deletions app/Console/Commands/RegenerateVectorsCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

namespace BookStack\Console\Commands;

use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Queries\SearchVector;
use BookStack\Search\Queries\StoreEntityVectorsJob;
use Illuminate\Console\Command;

class RegenerateVectorsCommand extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'bookstack:regenerate-vectors';

/**
* The console command description.
*
* @var string
*/
protected $description = 'Re-index vectors for all content in the system';

/**
* Execute the console command.
*/
public function handle(EntityProvider $entityProvider)
{
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
SearchVector::query()->delete();

$types = $entityProvider->all();
foreach ($types as $type => $typeInstance) {
$this->info("Creating jobs to store vectors for {$type} data...");
/** @var Entity[] $entities */
$typeInstance->newQuery()->chunkById(100, function ($entities) {
foreach ($entities as $entity) {
dispatch(new StoreEntityVectorsJob($entity));
}
});
}
}
}
89 changes: 89 additions & 0 deletions app/Search/Queries/EntityVectorGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Activity\Models\Tag;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Queries\Services\VectorQueryService;
use Illuminate\Support\Facades\DB;

class EntityVectorGenerator
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider
) {
}

public function generateAndStore(Entity $entity): void
{
$vectorService = $this->vectorQueryServiceProvider->get();

$text = $this->entityToPlainText($entity);
$chunks = $this->chunkText($text);
$embeddings = $this->chunksToEmbeddings($chunks, $vectorService);

$this->deleteExistingEmbeddingsForEntity($entity);
$this->storeEmbeddings($embeddings, $chunks, $entity);
}

protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
{
SearchVector::query()
->where('entity_type', '=', $entity->getMorphClass())
->where('entity_id', '=', $entity->id)
->delete();
}

protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
{
$toInsert = [];

foreach ($embeddings as $index => $embedding) {
$text = $textChunks[$index];
$toInsert[] = [
'entity_id' => $entity->id,
'entity_type' => $entity->getMorphClass(),
'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
'text' => $text,
];
}

$chunks = array_chunk($toInsert, 500);
foreach ($chunks as $chunk) {
SearchVector::query()->insert($chunk);
}
}

/**
* @param string[] $chunks
* @return float[] array
*/
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
{
$embeddings = [];
foreach ($chunks as $index => $chunk) {
$embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
}
return $embeddings;
}

/**
* @return string[]
*/
protected function chunkText(string $text): array
{
return (new TextChunker(500, ["\n", '.', ' ', '']))->chunk($text);
}

protected function entityToPlainText(Entity $entity): string
{
$tags = $entity->tags()->get();
$tagText = $tags->map(function (Tag $tag) {
return $tag->name . ': ' . $tag->value;
})->join('\n');

return $entity->name . "\n{$tagText}\n" . $entity->{$entity->textField};
}
}
26 changes: 26 additions & 0 deletions app/Search/Queries/LlmQueryRunner.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

namespace BookStack\Search\Queries;

use Exception;

class LlmQueryRunner
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
) {
}

/**
* Run a query against the configured LLM to produce a text response.
* @param VectorSearchResult[] $vectorResults
* @throws Exception
*/
public function run(string $query, array $vectorResults): string
{
$queryService = $this->vectorQueryServiceProvider->get();

$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
return $queryService->query($query, $matchesText);
}
}
61 changes: 61 additions & 0 deletions app/Search/Queries/QueryController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?php

namespace BookStack\Search\Queries;

use BookStack\Http\Controller;
use BookStack\Search\SearchRunner;
use Illuminate\Http\Request;

class QueryController extends Controller
{
public function __construct(
protected SearchRunner $searchRunner,
) {
// TODO - Check via testing
$this->middleware(function ($request, $next) {
if (!VectorQueryServiceProvider::isEnabled()) {
$this->showPermissionError('/');
}
return $next($request);
});
}

/**
* Show the view to start a vector/LLM-based query search.
*/
public function show(Request $request)
{
$query = $request->get('ask', '');

// TODO - Set page title

return view('search.query', [
'query' => $query,
]);
}

/**
* Perform a vector/LLM-based query search.
*/
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
{
// TODO - Rate limiting
$query = $request->get('query', '');

return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) {
$results = $query ? $searchRunner->run($query) : [];

$entities = [];
foreach ($results as $result) {
$entityKey = $result->entity->getMorphClass() . ':' . $result->entity->id;
if (!isset($entities[$entityKey])) {
$entities[$entityKey] = $result->entity;
}
}

yield ['view' => view('entities.list', ['entities' => $entities])->render()];

yield ['result' => $llmRunner->run($query, $results)];
});
}
}
26 changes: 26 additions & 0 deletions app/Search/Queries/SearchVector.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Permissions\Models\JointPermission;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany;

/**
* @property string $entity_type
* @property int $entity_id
* @property string $text
* @property string $embedding
*/
class SearchVector extends Model
{
public $timestamps = false;

public function jointPermissions(): HasMany
{
return $this->hasMany(JointPermission::class, 'entity_id', 'entity_id')
->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type');
}
}
66 changes: 66 additions & 0 deletions app/Search/Queries/Services/OpenAiVectorQueryService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?php

namespace BookStack\Search\Queries\Services;

use BookStack\Http\HttpRequestService;

class OpenAiVectorQueryService implements VectorQueryService
{
protected string $key;
protected string $endpoint;
protected string $embeddingModel;
protected string $queryModel;

public function __construct(
protected array $options,
protected HttpRequestService $http,
) {
// TODO - Some kind of validation of options
$this->key = $this->options['key'] ?? '';
$this->endpoint = $this->options['endpoint'] ?? '';
$this->embeddingModel = $this->options['embedding_model'] ?? '';
$this->queryModel = $this->options['query_model'] ?? '';
}

protected function jsonRequest(string $method, string $uri, array $data): array
{
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
$client = $this->http->buildClient(30);
$request = $this->http->jsonRequest($method, $fullUrl, $data)
->withHeader('Authorization', 'Bearer ' . $this->key);

$response = $client->sendRequest($request);
return json_decode($response->getBody()->getContents(), true);
}

public function generateEmbeddings(string $text): array
{
$response = $this->jsonRequest('POST', 'v1/embeddings', [
'input' => $text,
'model' => $this->embeddingModel,
]);

return $response['data'][0]['embedding'];
}

public function query(string $input, array $context): string
{
$formattedContext = implode("\n", $context);

$response = $this->jsonRequest('POST', 'v1/chat/completions', [
'model' => $this->queryModel,
'messages' => [
[
'role' => 'developer',
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
],
[
'role' => 'user',
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
]
],
]);

return $response['choices'][0]['message']['content'] ?? '';
}
}
21 changes: 21 additions & 0 deletions app/Search/Queries/Services/VectorQueryService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

namespace BookStack\Search\Queries\Services;

interface VectorQueryService
{
/**
* Generate embedding vectors from the given chunk of text.
* @return float[]
*/
public function generateEmbeddings(string $text): array;

/**
* Query the LLM service using the given user input, and
* relevant context text retrieved locally via a vector search.
* Returns the response output text from the LLM.
*
* @param string[] $context
*/
public function query(string $input, array $context): string;
}
30 changes: 30 additions & 0 deletions app/Search/Queries/StoreEntityVectorsJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

declare(strict_types=1);

namespace BookStack\Search\Queries;

use BookStack\Entities\Models\Entity;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Queue\Queueable;

class StoreEntityVectorsJob implements ShouldQueue
{
use Queueable;

/**
* Create a new job instance.
*/
public function __construct(
protected Entity $entity
) {
}

/**
* Execute the job.
*/
public function handle(EntityVectorGenerator $generator): void
{
$generator->generateAndStore($this->entity);
}
}
Loading
Loading