From c4a6585800b006295ada974353d0e22e7963ee64 Mon Sep 17 00:00:00 2001 From: ruttydm Date: Fri, 10 Apr 2026 11:31:45 +0200 Subject: [PATCH] Implement runtime memory alignment pipeline --- AGENTS.md | 61 + CLAUDE.md | 8 + app/Agents/OpenCompanyAgent.php | 89 +- app/Agents/Providers/CodexPrismGateway.php | 10 +- app/Agents/Providers/GlmPrismGateway.php | 10 +- app/Agents/Tools/ToolRegistry.php | 33 +- app/Jobs/AgentRespondJob.php | 46 +- app/Jobs/ExecuteAgentTaskJob.php | 4 + app/Jobs/RunAutomationJob.php | 11 +- app/Listeners/CheckpointToolCall.php | 8 +- app/Models/ConversationSummary.php | 9 + app/Providers/AppServiceProvider.php | 64 +- app/Services/AgentPermissionService.php | 21 +- app/Services/LuaApiDocGenerator.php | 131 +- app/Services/LuaSandboxService.php | 88 + .../Memory/CompactionMemoryExtractor.php | 41 + app/Services/Memory/CompactionPlan.php | 25 + app/Services/Memory/ContextBudget.php | 92 + app/Services/Memory/ContextPruner.php | 143 ++ .../Memory/ConversationCompactionService.php | 223 +- app/Services/Memory/MemoryFlushService.php | 40 +- app/Services/Memory/ModelContextRegistry.php | 44 +- app/Services/Memory/OutputTruncator.php | 60 + app/Services/Memory/PromptFrameBuilder.php | 78 + .../Memory/ToolResultDeduplicator.php | 124 ++ config/memory.php | 60 + ...acking_to_conversation_summaries_table.php | 30 + docs/INDEX.md | 8 +- docs/architecture/kosmokrator-reuse-audit.md | 953 ++++++++ .../runtime-alignment-implementation-audit.md | 21 + docs/external-channel-sync.md | 591 +---- docs/planning/implementation-todo.md | 1968 ----------------- ...kosmokrator-runtime-alignment-checklist.md | 49 + docs/planning/memory-implementation.md | 1863 +--------------- docs/testing/feature-test-map.md | 1093 --------- docs/todo.md | 38 - resources/lua-docs/_overview.md | 78 +- tests/Feature/OpenCompanyAgentTest.php | 63 +- .../ConversationCompactionServiceTest.php | 32 +- .../Memory/MemoryFlushServiceTest.php | 18 +- .../Memory/ModelContextRegistryTest.php | 16 + tests/Unit/ContextPrunerTest.php | 58 + tests/Unit/OutputTruncatorTest.php | 30 + tests/Unit/PrismMessagesTest.php | 46 + tests/Unit/PromptFrameBuilderTest.php | 26 + tests/Unit/ToolResultDeduplicatorTest.php | 75 + 46 files changed, 2847 insertions(+), 5732 deletions(-) create mode 100644 AGENTS.md create mode 100644 app/Services/Memory/CompactionMemoryExtractor.php create mode 100644 app/Services/Memory/CompactionPlan.php create mode 100644 app/Services/Memory/ContextBudget.php create mode 100644 app/Services/Memory/ContextPruner.php create mode 100644 app/Services/Memory/OutputTruncator.php create mode 100644 app/Services/Memory/PromptFrameBuilder.php create mode 100644 app/Services/Memory/ToolResultDeduplicator.php create mode 100644 database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php create mode 100644 docs/architecture/kosmokrator-reuse-audit.md create mode 100644 docs/architecture/runtime-alignment-implementation-audit.md delete mode 100644 docs/planning/implementation-todo.md create mode 100644 docs/planning/kosmokrator-runtime-alignment-checklist.md delete mode 100644 docs/testing/feature-test-map.md delete mode 100644 docs/todo.md create mode 100644 tests/Unit/ContextPrunerTest.php create mode 100644 tests/Unit/OutputTruncatorTest.php create mode 100644 tests/Unit/PrismMessagesTest.php create mode 100644 tests/Unit/PromptFrameBuilderTest.php create mode 100644 tests/Unit/ToolResultDeduplicatorTest.php diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..76dcf93 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,61 @@ +# OpenCompany Agent Guide + +This file is the agent-facing operating guide for work inside the OpenCompany repository. + +## Project Basics + +- App URL for local development: `http://opencompany.test` +- Stack: +- Laravel 12 +- Vue 3 + Inertia.js +- Tailwind CSS v4 +- Reka UI primitives + +## Workspace Rules + +- OpenCompany is multi-workspace. +- Most data is workspace-scoped. +- Current workspace is resolved by middleware and available through `workspace()`. +- When adding queries, always scope them correctly. +- For models with `workspace_id`, use `forWorkspace()`. +- For related models, scope through the relationship with `whereHas(...)` or equivalent. + +## Agent Runtime Notes + +- The main runtime agent class is `app/Agents/OpenCompanyAgent.php`. +- Identity/system-prompt content is assembled from identity files and agent config, not from a static hardcoded prompt. +- The repo historically referenced `AGENTS.md` as an architectural concept, but this app currently stores agent instructions through its identity-file/document system. + +## UI Rules + +- Shared UI components live in `resources/js/Components/shared/`. +- Prefer wrapper components over raw elements when equivalents already exist. +- Dark mode exists and should not be broken by new UI work. + +## MCP CLI + +- MCP CLI is installed at `~/.local/bin/mcp-cli`. +- Config is at `~/.config/mcp/mcp_servers.json`. +- Common usage: +- `mcp-cli` +- `mcp-cli info ` +- `mcp-cli call ''` +- Connected servers currently include: +- `founder-mode` +- `notion` +- `vibe_kanban` +- `plane` + +## Repo Conventions + +- Prefer `rg` and `rg --files` for search. +- Keep edits targeted. Do not revert unrelated user changes. +- Do not patch `vendor/` for durable product work unless the task is explicitly temporary or exploratory. +- Put audits and investigations into markdown docs under `docs/`. + +## Current Documentation Anchors + +- Repo rules and local setup: `CLAUDE.md` +- Docs index: `docs/INDEX.md` +- Runtime audit: `docs/architecture/runtime-alignment-implementation-audit.md` +- Plane issue OC-1 investigation: `docs/architecture/plane-oc-1-investigation.md` diff --git a/CLAUDE.md b/CLAUDE.md index b6316bb..7d1a643 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,3 +29,11 @@ - Shared components are in `resources/js/Components/shared/` - Use the wrapper components (Button, Modal, Badge, etc.) instead of native elements for consistency - Dark mode is supported via the `useColorMode` composable + +## CLI Tools + +### mcp-cli +- Installed at `~/.local/bin/mcp-cli` — a lightweight CLI for testing and calling MCP servers +- Config: `~/.config/mcp/mcp_servers.json` +- Usage: `mcp-cli` (list all), `mcp-cli info ` (details), `mcp-cli call ''` (call a tool) +- Connected servers: `founder-mode`, `notion`, `vibe_kanban`, `plane` diff --git a/app/Agents/OpenCompanyAgent.php b/app/Agents/OpenCompanyAgent.php index f110420..510e0d9 100644 --- a/app/Agents/OpenCompanyAgent.php +++ b/app/Agents/OpenCompanyAgent.php @@ -11,6 +11,9 @@ use App\Models\TaskStep; use App\Models\User; use App\Services\AgentDocumentService; +use App\Services\Memory\ContextPruner; +use App\Services\Memory\PromptFrameBuilder; +use App\Services\Memory\ToolResultDeduplicator; use Laravel\Ai\Contracts\Agent; use Laravel\Ai\Contracts\Conversational; use Laravel\Ai\Contracts\HasTools; @@ -21,12 +24,18 @@ use Laravel\Ai\Responses\Data\ToolCall; use Laravel\Ai\Responses\Data\ToolResult; use Illuminate\Support\Str; +use OpenCompany\PrismRelay\Contracts\HasSystemPrompts; #[MaxTokens(16_384)] -class OpenCompanyAgent implements Agent, HasTools, Conversational +class OpenCompanyAgent implements Agent, HasTools, Conversational, HasSystemPrompts { use Promptable; + /** + * @var array|null + */ + private ?array $promptFrameCache = null; + /** @var array */ private array $resolvedProvider; @@ -39,6 +48,9 @@ public function __construct( private ChannelConversationLoader $conversationLoader, private DynamicProviderResolver $providerResolver, private ToolRegistry $toolRegistry, + private PromptFrameBuilder $promptFrameBuilder, + private ToolResultDeduplicator $toolResultDeduplicator, + private ContextPruner $contextPruner, private ?string $taskId = null, ) { $this->resolvedProvider = $this->providerResolver->resolve($this->agent); @@ -79,11 +91,51 @@ public function resumeFrom(string $taskId): static /** * Get the instructions (system prompt) for this agent. * - * Assembles from identity files in the same order as AgentChatService. + * Returns the full concatenated prompt (stable + volatile). When a + * SystemPromptBag is bound, CachingPrismGateway uses the split prompts + * from the bag instead for cache-friendly framing. */ public function instructions(): string { - return implode('', array_column($this->buildSections(), 'content')); + return $this->promptFrame()['full_prompt']; + } + + /** + * Get the full instruction set before stable/volatile splitting. + */ + public function fullInstructions(): string + { + return $this->promptFrame()['full_prompt']; + } + + /** + * Get the volatile runtime context that should travel with the user prompt. + */ + public function volatilePromptContext(): string + { + return $this->promptFrame()['volatile_prompt']; + } + + /** + * Runtime context now travels as additional system prompts via the gateway, + * so the user prompt should remain unchanged. + */ + public function preparePrompt(string $prompt): string + { + return $prompt; + } + + /** + * @return string[] + */ + public function systemPrompts(): array + { + $frame = $this->promptFrame(); + + return array_values(array_filter([ + trim($frame['stable_prompt']), + trim($frame['volatile_prompt']), + ], fn (string $prompt) => $prompt !== '')); } /** @@ -94,10 +146,27 @@ public function instructions(): string */ public function instructionsBreakdown(): array { - return array_values(array_map( - fn (array $s) => ['label' => $s['label'], 'chars' => mb_strlen($s['content'])], - $this->buildSections(), - )); + return $this->promptFrame()['stable_breakdown']; + } + + /** + * @return array + */ + public function volatileInstructionsBreakdown(): array + { + return $this->promptFrame()['volatile_breakdown']; + } + + /** + * @return array + */ + public function promptFrame(): array + { + if ($this->promptFrameCache !== null) { + return $this->promptFrameCache; + } + + return $this->promptFrameCache = $this->promptFrameBuilder->splitSections($this->buildSections()); } /** @@ -228,7 +297,7 @@ private function injectPeerCards(array &$sections, Channel $channel): void */ public function messages(): iterable { - $messages = $this->conversationLoader->load($this->channelId, $this->agent, $this->instructions()); + $messages = $this->conversationLoader->load($this->channelId, $this->agent, $this->fullInstructions()); if ($this->resumeFromTaskId) { $messages = $this->injectCheckpointedSteps($messages); @@ -289,7 +358,9 @@ private function injectCheckpointedSteps(iterable $messages): array ); } - return $messages; + $deduplicated = $this->toolResultDeduplicator->deduplicate($messages)['messages']; + + return $this->contextPruner->prune($deduplicated)['messages']; } /** diff --git a/app/Agents/Providers/CodexPrismGateway.php b/app/Agents/Providers/CodexPrismGateway.php index 5fb4f61..33b6ae1 100644 --- a/app/Agents/Providers/CodexPrismGateway.php +++ b/app/Agents/Providers/CodexPrismGateway.php @@ -5,18 +5,16 @@ use Illuminate\Contracts\Events\Dispatcher; use Illuminate\Http\Client\ConnectionException; use Illuminate\Http\Client\RequestException; -use Laravel\Ai\Gateway\Prism\PrismGateway; use Laravel\Ai\Gateway\TextGenerationOptions; use Laravel\Ai\Providers\Provider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; /** - * Custom PrismGateway that routes Codex requests to the registered 'codex' Prism provider. + * Custom gateway that routes Codex requests to the registered 'codex' Prism provider. * - * The Codex provider extends OpenAI and uses the same Responses API format, but routes - * requests through chatgpt.com/backend-api/codex/ using OAuth tokens from a ChatGPT - * Pro/Plus subscription — $0 token costs. + * Extends CachingPrismGateway for prompt cache support. */ -class CodexPrismGateway extends PrismGateway +class CodexPrismGateway extends CachingPrismGateway { public function __construct(Dispatcher $events) { diff --git a/app/Agents/Providers/GlmPrismGateway.php b/app/Agents/Providers/GlmPrismGateway.php index 2c7b6e5..916ec3a 100644 --- a/app/Agents/Providers/GlmPrismGateway.php +++ b/app/Agents/Providers/GlmPrismGateway.php @@ -5,19 +5,17 @@ use Illuminate\Contracts\Events\Dispatcher; use Illuminate\Http\Client\ConnectionException; use Illuminate\Http\Client\RequestException; -use Laravel\Ai\Gateway\Prism\PrismGateway; use Laravel\Ai\Gateway\TextGenerationOptions; use Laravel\Ai\Providers\Provider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; /** - * Custom PrismGateway that routes requests to custom Prism providers + * Custom gateway that routes requests to custom Prism providers * registered via PrismManager::extend() (GLM, Kimi, MiniMax, etc.). * - * The base PrismGateway maps driver names to PrismProvider enums, which only - * works for native Prism providers. Custom providers need their string key - * passed directly to Prism's using() method. + * Extends CachingPrismGateway for prompt cache support on all providers. */ -class GlmPrismGateway extends PrismGateway +class GlmPrismGateway extends CachingPrismGateway { public function __construct(Dispatcher $events) { diff --git a/app/Agents/Tools/ToolRegistry.php b/app/Agents/Tools/ToolRegistry.php index ecb9777..d76cc9d 100644 --- a/app/Agents/Tools/ToolRegistry.php +++ b/app/Agents/Tools/ToolRegistry.php @@ -7,7 +7,6 @@ use App\Models\AppSetting; use App\Models\User; use App\Services\AgentPermissionService; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; class ToolRegistry { @@ -47,7 +46,6 @@ class ToolRegistry public function __construct( private AgentPermissionService $permissionService, - private ToolProviderRegistry $providerRegistry, ) {} /** @@ -94,7 +92,7 @@ private function getEffectiveToolMap(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { foreach ($provider->tools() as $slug => $meta) { $this->effectiveToolMap[$slug] = $meta; } @@ -121,7 +119,7 @@ private function getEffectiveAppGroups(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); $this->effectiveAppGroups[$provider->appName()] = [ 'tools' => array_keys($provider->tools()), @@ -139,7 +137,7 @@ public function getEffectiveIntegrationApps(): array { if ($this->effectiveIntegrationApps === null) { $this->effectiveIntegrationApps = self::INTEGRATION_APPS; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { if ($provider->isIntegration() && ! in_array($provider->appName(), $this->effectiveIntegrationApps)) { $this->effectiveIntegrationApps[] = $provider->appName(); } @@ -161,7 +159,7 @@ private function getEffectiveAppIcons(): array } // External integration providers - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); $this->effectiveAppIcons[$provider->appName()] = $meta['icon']; } @@ -175,7 +173,7 @@ private function getEffectiveIntegrationLogos(): array { if ($this->effectiveIntegrationLogos === null) { $this->effectiveIntegrationLogos = []; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { $meta = $provider->appMeta(); if (isset($meta['logo'])) { $this->effectiveIntegrationLogos[$provider->appName()] = $meta['logo']; @@ -218,6 +216,11 @@ public function getToolMetaBySlug(string $slug): array ]; } + public function getToolTypeBySlug(string $slug): ?string + { + return $this->getEffectiveToolMap()[$slug]['type'] ?? null; + } + // ─── Tool filtering and instantiation ────────────────────────────────── /** @@ -582,7 +585,7 @@ private function instantiateTool(string $class, User $agent, string $slug = ''): ]; // Check external integration providers first - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { foreach ($provider->tools() as $toolSlug => $meta) { if ($meta['class'] === $class && ($slug === '' || $toolSlug === $slug)) { return $provider->createTool($class, [ @@ -630,4 +633,18 @@ private function buildAppLookup(): array return $lookup; } + + /** + * @return array + */ + private function integrationProviders(): array + { + $registryClass = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($registryClass) || ! app()->bound($registryClass)) { + return []; + } + + return app($registryClass)->all(); + } } diff --git a/app/Jobs/AgentRespondJob.php b/app/Jobs/AgentRespondJob.php index 63f2ace..e617302 100644 --- a/app/Jobs/AgentRespondJob.php +++ b/app/Jobs/AgentRespondJob.php @@ -22,11 +22,13 @@ use App\Jobs\Concerns\SetsWorkspaceContext; use App\Services\AgentCommunicationService; use App\Services\AgentDocumentService; +use App\Services\Memory\ContextBudget; use App\Services\Memory\ModelContextRegistry; use App\Services\TelegramService; use Laravel\Ai\Responses\AgentResponse; use Laravel\Ai\Responses\Data\FinishReason; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; use Illuminate\Support\Str; class AgentRespondJob implements ShouldQueue, ShouldBeUnique @@ -197,13 +199,24 @@ public function handle(): void $agentInstance->resumeFrom($task->id); } + $currentMessages = []; + // Capture LLM context before prompting (for observability) try { $toolRegistry = app(\App\Agents\Tools\ToolRegistry::class); + $promptFrame = $agentInstance->promptFrame(); + $currentMessages = $agentInstance->messages(); + $contextBudget = app(ContextBudget::class)->snapshotForAgent( + $this->agent, + $currentMessages, + $agentInstance->fullInstructions(), + ); $task->update([ 'context' => [ 'system_prompt' => $agentInstance->instructions(), - 'messages' => collect($agentInstance->messages()) /** @phpstan-ignore argument.templateType */ + 'full_system_prompt' => $agentInstance->fullInstructions(), + 'volatile_prompt_context' => $agentInstance->volatilePromptContext(), + 'messages' => collect($currentMessages) /** @phpstan-ignore argument.templateType */ ->map(fn ($m) => [ 'role' => $m->role->value, 'content' => Str::limit($m->content ?? '', 2000), @@ -211,9 +224,11 @@ public function handle(): void 'tools' => $toolRegistry->getToolSlugsForAgent($this->agent), 'model' => $agentInstance->model(), 'provider' => $agentInstance->provider(), - 'prompt_sections' => $agentInstance->instructionsBreakdown(), + 'prompt_sections' => $promptFrame['stable_breakdown'], + 'volatile_prompt_sections' => $promptFrame['volatile_breakdown'], 'context_window' => app(ModelContextRegistry::class) - ->getContextWindow($agentInstance->model()), + ->getContextWindow($agentInstance->model(), $agentInstance->provider()), + 'context_budget' => $contextBudget, ], ]); } catch (\Throwable $e) { @@ -223,8 +238,7 @@ public function handle(): void // Memory flush: save important context to LTM before compaction try { $flushService = app(\App\Services\Memory\MemoryFlushService::class); - $currentMessages = $agentInstance->messages(); - if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages, $agentInstance->instructions())) { + if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages, $agentInstance->fullInstructions())) { $flushStep = $task->addStep('Flushing memories before compaction', 'action'); $flushStep->start(); $flushService->flush($this->channelId, $this->agent); @@ -235,7 +249,12 @@ public function handle(): void } $llmStep->start(); - $response = $agentInstance->prompt($this->buildPromptWithThreadContext($this->userMessage)); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); + $response = $agentInstance->prompt( + $this->buildPromptWithThreadContext($this->userMessage) + ); $lastStep = $response->steps->last(); @@ -375,17 +394,20 @@ public function handle(): void $outputReserve = (int) config('memory.compaction.output_reserve', 4_096); $systemChars = mb_strlen($task->context['system_prompt'] ?? ''); + $volatileChars = mb_strlen($task->context['volatile_prompt_context'] ?? ''); $messageChars = array_sum(array_map( fn ($m) => mb_strlen($m['content'] ?? ''), $task->context['messages'] ?? [], )); - $totalChars = $systemChars + $messageChars; + $totalChars = $systemChars + $volatileChars + $messageChars; $systemRatio = $totalChars > 0 ? $systemChars / $totalChars : 0.5; + $volatileRatio = $totalChars > 0 ? $volatileChars / $totalChars : 0.0; $systemTokens = (int) round($lastStepPromptTokens * $systemRatio); - $messageTokens = $lastStepPromptTokens - $systemTokens; + $volatileTokens = (int) round($lastStepPromptTokens * $volatileRatio); + $messageTokens = max(0, $lastStepPromptTokens - $systemTokens - $volatileTokens); - $available = max(0, $contextWindow - $systemTokens - $outputReserve); + $available = max(0, $contextWindow - $systemTokens - $volatileTokens - $outputReserve); $thresholdRatio = (float) config('memory.compaction.threshold_ratio', 0.75); $safetyMargin = (float) config('memory.compaction.safety_margin', 1.2); $compactionThreshold = (int) ($available * $thresholdRatio); @@ -403,6 +425,10 @@ public function handle(): void 'total' => $systemTokens, 'sections' => $context['prompt_sections'] ?? [], ], + 'volatile_prompt_context' => [ + 'total' => $volatileTokens, + 'sections' => $context['volatile_prompt_sections'] ?? [], + ], 'messages' => [ 'total' => $messageTokens, 'count' => count($context['messages'] ?? []), @@ -418,7 +444,7 @@ public function handle(): void 'last_step_prompt_tokens' => $lastStepPromptTokens, 'finish_reason' => $lastStep?->finishReason->value ?? 'unknown', ]; - unset($context['prompt_sections'], $context['context_window']); + unset($context['prompt_sections'], $context['volatile_prompt_sections'], $context['context_window']); $task->update(['context' => $context]); } catch (\Throwable $e) { Log::warning('Post-delivery bookkeeping failed', ['error' => $e->getMessage(), 'task' => $task->id]); diff --git a/app/Jobs/ExecuteAgentTaskJob.php b/app/Jobs/ExecuteAgentTaskJob.php index 2cb163a..85e6d25 100644 --- a/app/Jobs/ExecuteAgentTaskJob.php +++ b/app/Jobs/ExecuteAgentTaskJob.php @@ -16,6 +16,7 @@ use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class ExecuteAgentTaskJob implements ShouldQueue { @@ -64,6 +65,9 @@ public function handle(): void $agentInstance = OpenCompanyAgent::for($agent, $channelId, $this->task->id); $analyzeStep->start(); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $response = $agentInstance->prompt($prompt); $analyzeStep->complete(); diff --git a/app/Jobs/RunAutomationJob.php b/app/Jobs/RunAutomationJob.php index abbb243..10e189d 100644 --- a/app/Jobs/RunAutomationJob.php +++ b/app/Jobs/RunAutomationJob.php @@ -21,6 +21,7 @@ use Illuminate\Queue\SerializesModels; use Illuminate\Support\Facades\Log; use Illuminate\Support\Str; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class RunAutomationJob implements ShouldQueue, ShouldBeUnique { @@ -127,12 +128,17 @@ public function handle(): void // Capture LLM context for observability try { $toolRegistry = app(\App\Agents\Tools\ToolRegistry::class); + $promptFrame = $agentInstance->promptFrame(); $task->update([ 'context' => array_merge($task->context ?? [], [ + 'system_prompt' => $agentInstance->instructions(), + 'full_system_prompt' => $agentInstance->fullInstructions(), + 'volatile_prompt_context' => $agentInstance->volatilePromptContext(), 'tools' => $toolRegistry->getToolSlugsForAgent($agent), 'model' => $agentInstance->model(), 'provider' => $agentInstance->provider(), - 'prompt_sections' => $agentInstance->instructionsBreakdown(), + 'prompt_sections' => $promptFrame['stable_breakdown'], + 'volatile_prompt_sections' => $promptFrame['volatile_breakdown'], ]), ]); } catch (\Throwable $e) { @@ -141,6 +147,9 @@ public function handle(): void $prompt = $this->buildScheduledPrompt(); $generationStartedAt = now(); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $response = $agentInstance->prompt($prompt); $generationCompletedAt = now(); diff --git a/app/Listeners/CheckpointToolCall.php b/app/Listeners/CheckpointToolCall.php index 900b802..3f4e706 100644 --- a/app/Listeners/CheckpointToolCall.php +++ b/app/Listeners/CheckpointToolCall.php @@ -5,6 +5,7 @@ use App\Agents\OpenCompanyAgent; use App\Models\Task; use App\Models\User; +use App\Services\Memory\OutputTruncator; use App\Support\LuaMetaParser; use Laravel\Ai\Events\ToolInvoked; use App\Agents\Tools\ToolRegistry; @@ -49,10 +50,9 @@ public function handle(ToolInvoked $event): void $luaMeta = $extracted['meta']; $result = $extracted['result']; - // Truncate large string results to prevent DB bloat - if (is_string($result) && strlen($result) > 2000) { - $result = mb_strcut($result, 0, 2000, 'UTF-8') . '... [truncated]'; - } + // Truncate large results before checkpoint persistence to keep + // retry context lean while preserving the full payload durably. + $result = app(OutputTruncator::class)->truncate($result, $event->toolInvocationId); // Sanitize to valid UTF-8 to prevent JSON encoding failures if (is_string($result)) { diff --git a/app/Models/ConversationSummary.php b/app/Models/ConversationSummary.php index a2bcdf9..d66e366 100644 --- a/app/Models/ConversationSummary.php +++ b/app/Models/ConversationSummary.php @@ -23,6 +23,15 @@ class ConversationSummary extends Model 'flush_count', 'messages_summarized', 'last_message_id', + 'compaction_failure_count', + 'last_compaction_failed_at', + 'compaction_circuit_open_until', + 'last_compaction_error', + ]; + + protected $casts = [ + 'last_compaction_failed_at' => 'datetime', + 'compaction_circuit_open_until' => 'datetime', ]; /** @return BelongsTo */ diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index a9f8846..ce528c6 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -20,7 +20,18 @@ use Illuminate\Support\Facades\Vite; use Illuminate\Support\ServiceProvider; use Laravel\Ai\AiManager; +use Laravel\Ai\Providers\AnthropicProvider; +use Laravel\Ai\Providers\AzureOpenAiProvider; +use Laravel\Ai\Providers\DeepSeekProvider; +use Laravel\Ai\Providers\GeminiProvider; +use Laravel\Ai\Providers\GroqProvider; +use Laravel\Ai\Providers\MistralProvider; +use Laravel\Ai\Providers\OllamaProvider; use Laravel\Ai\Providers\OpenAiProvider; +use Laravel\Ai\Providers\OpenRouterProvider; +use Laravel\Ai\Providers\VoyageAiProvider; +use Laravel\Ai\Providers\XaiProvider; +use OpenCompany\PrismRelay\Bridge\CachingPrismGateway; use Prism\Prism\PrismManager; class AppServiceProvider extends ServiceProvider @@ -83,18 +94,39 @@ public function boot(): void // Custom Prism providers (GLM, Kimi, MiniMax) are registered by // PrismRelayServiceProvider via afterResolving(PrismManager::class). - // Register 'glm' and 'glm-coding' as custom AI SDK drivers. - // These use GlmPrismGateway which routes to our custom 'glm' Prism provider - // (chat/completions) instead of the default OpenAI provider (/responses). + // Override all AI SDK drivers to use CachingPrismGateway for provider-aware + // prompt caching (Anthropic ephemeral, Gemini dedicated, OpenAI auto). // Use afterResolving because AiManager is scoped (recreated per job in queue workers). $this->app->afterResolving(AiManager::class, function (AiManager $aiManager, $app) { - $createGlmDriver = function ($app, array $config) { - return new OpenAiProvider( - new GlmPrismGateway($app['events']), - $config, - $app->make(Dispatcher::class) - ); - }; + $gateway = new CachingPrismGateway($app['events']); + $dispatcher = $app->make(Dispatcher::class); + + // Standard drivers — replace PrismGateway with CachingPrismGateway + $standardDrivers = [ + 'anthropic' => AnthropicProvider::class, + 'azure' => AzureOpenAiProvider::class, + 'deepseek' => DeepSeekProvider::class, + 'gemini' => GeminiProvider::class, + 'groq' => GroqProvider::class, + 'mistral' => MistralProvider::class, + 'ollama' => OllamaProvider::class, + 'openai' => OpenAiProvider::class, + 'openrouter' => OpenRouterProvider::class, + 'voyageai' => VoyageAiProvider::class, + 'xai' => XaiProvider::class, + ]; + + foreach ($standardDrivers as $driver => $providerClass) { + $aiManager->extend($driver, fn ($app, array $config) => new $providerClass( + $gateway, $config, $dispatcher, + )); + } + + // GLM-family drivers — use GlmPrismGateway (extends CachingPrismGateway) + $glmGateway = new GlmPrismGateway($app['events']); + $createGlmDriver = fn ($app, array $config) => new OpenAiProvider( + $glmGateway, $config, $dispatcher, + ); $aiManager->extend('glm', $createGlmDriver); $aiManager->extend('glm-coding', $createGlmDriver); @@ -103,14 +135,10 @@ public function boot(): void $aiManager->extend('minimax', $createGlmDriver); $aiManager->extend('minimax-cn', $createGlmDriver); - // Register Codex driver (ChatGPT subscription via OAuth) - $aiManager->extend('codex', function ($app, array $config) { - return new OpenAiProvider( - new CodexPrismGateway($app['events']), - $config, - $app->make(Dispatcher::class) - ); - }); + // Codex driver (ChatGPT subscription via OAuth) + $aiManager->extend('codex', fn ($app, array $config) => new OpenAiProvider( + new CodexPrismGateway($app['events']), $config, $dispatcher, + )); }); } diff --git a/app/Services/AgentPermissionService.php b/app/Services/AgentPermissionService.php index f48ac0c..2d94e90 100644 --- a/app/Services/AgentPermissionService.php +++ b/app/Services/AgentPermissionService.php @@ -8,7 +8,6 @@ use App\Models\User; use App\Models\WorkspaceFile; use Illuminate\Support\Str; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; class AgentPermissionService { @@ -28,9 +27,7 @@ class AgentPermissionService 'contact_agent', ]; - public function __construct( - private ToolProviderRegistry $providerRegistry, - ) {} + public function __construct() {} /** * Resolve the final permission for a tool, combining DB permissions with behavior mode. * @@ -156,7 +153,7 @@ public function getEnabledIntegrations(User $agent): array // Build full list of all integration app names $allApps = \App\Agents\Tools\ToolRegistry::INTEGRATION_APPS; - foreach ($this->providerRegistry->all() as $provider) { + foreach ($this->integrationProviders() as $provider) { if ($provider->isIntegration() && !in_array($provider->appName(), $allApps)) { $allApps[] = $provider->appName(); } @@ -402,4 +399,18 @@ private function behaviorModeRequiresApproval(User $agent, string $toolType): bo default => false, }; } + + /** + * @return array + */ + private function integrationProviders(): array + { + $registryClass = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($registryClass) || ! app()->bound($registryClass)) { + return []; + } + + return app($registryClass)->all(); + } } diff --git a/app/Services/LuaApiDocGenerator.php b/app/Services/LuaApiDocGenerator.php index f9bc19c..9e64203 100644 --- a/app/Services/LuaApiDocGenerator.php +++ b/app/Services/LuaApiDocGenerator.php @@ -4,9 +4,6 @@ use App\Agents\Tools\ToolRegistry; use App\Models\User; -use OpenCompany\IntegrationCore\Lua\LuaCatalogBuilder; -use OpenCompany\IntegrationCore\Lua\LuaDocRenderer; -use OpenCompany\IntegrationCore\Support\ToolProviderRegistry; class LuaApiDocGenerator { @@ -17,14 +14,17 @@ class LuaApiDocGenerator public function __construct( private ToolRegistry $registry, - private ToolProviderRegistry $providerRegistry, - private LuaCatalogBuilder $catalogBuilder, - private LuaDocRenderer $docRenderer, ) {} public function generateNamespaceIndex(User $agent, ?string $filterNamespace = null): string { - return $this->docRenderer->generateNamespaceIndex( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getNamespaceSummary($agent); + } + + return $renderer->generateNamespaceIndex( $this->buildNamespaces($agent), $this->getStaticPageContents(), $filterNamespace, @@ -33,7 +33,13 @@ public function generateNamespaceIndex(User $agent, ?string $filterNamespace = n public function generateNamespaceDocs(string $namespace, User $agent): string { - return $this->docRenderer->generateNamespaceDocs( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getProviderLuaDocs($namespace) ?? "No Lua docs available for namespace '{$namespace}'."; + } + + return $renderer->generateNamespaceDocs( $namespace, $this->buildNamespaces($agent), fn (string $ns) => $this->getProviderLuaDocs($ns), @@ -42,7 +48,13 @@ public function generateNamespaceDocs(string $namespace, User $agent): string public function generateFunctionDocs(string $namespace, string $function, User $agent): string { - return $this->docRenderer->generateFunctionDocs( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return "Lua docs renderer unavailable for {$namespace}.{$function}."; + } + + return $renderer->generateFunctionDocs( $namespace, $function, $this->buildNamespaces($agent), @@ -51,7 +63,13 @@ public function generateFunctionDocs(string $namespace, string $function, User $ public function search(string $query, User $agent, int $limit = 10): string { - return $this->docRenderer->search( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return $this->getNamespaceSummary($agent); + } + + return $renderer->search( $query, $this->buildNamespaces($agent), $this->getStaticPageContents(), @@ -68,10 +86,14 @@ private function buildNamespaces(User $agent): array return $this->cachedNamespaces; } - $this->cachedNamespaces = $this->catalogBuilder->buildNamespaces( - $this->registry->getToolCatalog($agent), - ['tasks', 'system', 'lua'], - ); + $builder = $this->catalogBuilder(); + + $this->cachedNamespaces = $builder !== null + ? $builder->buildNamespaces( + $this->registry->getToolCatalog($agent), + ['tasks', 'system', 'lua'], + ) + : []; $this->cachedAgent = $agent; return $this->cachedNamespaces; @@ -82,7 +104,11 @@ private function buildNamespaces(User $agent): array */ public function buildFunctionMap(User $agent): array { - return $this->catalogBuilder->buildFunctionMap($this->buildNamespaces($agent)); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->buildFunctionMap($this->buildNamespaces($agent)) + : []; } /** @@ -90,7 +116,11 @@ public function buildFunctionMap(User $agent): array */ public function buildParameterMap(User $agent): array { - return $this->catalogBuilder->buildParameterMap($this->buildNamespaces($agent)); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->buildParameterMap($this->buildNamespaces($agent)) + : []; } /** @@ -98,7 +128,13 @@ public function buildParameterMap(User $agent): array */ public function getAvailablePages(User $agent): array { - return $this->docRenderer->getAvailablePages( + $renderer = $this->docRenderer(); + + if ($renderer === null) { + return array_keys($this->getStaticPageContents()); + } + + return $renderer->getAvailablePages( $this->buildNamespaces($agent), $this->getStaticPageContents(), ); @@ -110,11 +146,17 @@ public function getAvailablePages(User $agent): array */ private function getProviderLuaDocs(string $namespace): ?string { + $providerRegistry = $this->providerRegistry(); + + if ($providerRegistry === null) { + return null; + } + $appName = str_starts_with($namespace, 'integrations.') ? substr($namespace, strlen('integrations.')) : $namespace; - $provider = $this->providerRegistry->get($appName); + $provider = $providerRegistry->get($appName); if ($provider === null) { return null; } @@ -173,7 +215,19 @@ public function getStaticDocsForCatalog(): array public function getNamespaceSummary(User $agent): string { - return $this->docRenderer->getNamespaceSummary($this->buildNamespaces($agent)); + $renderer = $this->docRenderer(); + + if ($renderer === null) { + $namespaces = array_keys($this->buildNamespaces($agent)); + + if ($namespaces === []) { + return 'No external Lua API namespaces are available in this workspace.'; + } + + return "Available Lua namespaces:\n- " . implode("\n- ", $namespaces); + } + + return $renderer->getNamespaceSummary($this->buildNamespaces($agent)); } /** @@ -231,6 +285,43 @@ public function readStaticPage(string $slug): ?string */ private function deriveFunctionName(string $toolName, string $appName): string { - return $this->catalogBuilder->deriveFunctionName($toolName, $appName); + $builder = $this->catalogBuilder(); + + return $builder !== null + ? $builder->deriveFunctionName($toolName, $appName) + : $toolName; + } + + private function providerRegistry(): ?object + { + $class = \OpenCompany\IntegrationCore\Support\ToolProviderRegistry::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); + } + + private function catalogBuilder(): ?object + { + $class = \OpenCompany\IntegrationCore\Lua\LuaCatalogBuilder::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); + } + + private function docRenderer(): ?object + { + $class = \OpenCompany\IntegrationCore\Lua\LuaDocRenderer::class; + + if (! class_exists($class) || ! app()->bound($class)) { + return null; + } + + return app($class); } } diff --git a/app/Services/LuaSandboxService.php b/app/Services/LuaSandboxService.php index 7089ca7..b8962e4 100644 --- a/app/Services/LuaSandboxService.php +++ b/app/Services/LuaSandboxService.php @@ -36,6 +36,8 @@ public function execute(string $code, array $options = [], ?LuaBridge $bridge = $sandbox->load("{$name} = " . $this->phpToLua($value))->call(); } + $this->registerJsonGlobals($sandbox); + $start = microtime(true); try { @@ -224,4 +226,90 @@ private function setupAppNamespace(Sandbox $sandbox, LuaBridge $bridge): void app = make_namespace("") ')->call(); } + + /** + * Register `json.decode()`, `json.encode()`, and `regex.*` as Lua globals. + * + * JSON bridges PHP's json_decode/json_encode so Lua scripts can parse + * JSON strings. Regex bridges PHP's PCRE for patterns Lua's built-in + * matching doesn't support (lookaheads, non-greedy, Unicode, etc.). + */ + private function registerJsonGlobals(Sandbox $sandbox): void + { + $sandbox->register('__json', [ + 'decode' => function (string $json): mixed { + return json_decode($json, associative: true, depth: 512, flags: JSON_THROW_ON_ERROR); + }, + 'encode' => function (mixed $value): string { + return json_encode($value, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT); + }, + ]); + + $sandbox->register('__regex', [ + 'match' => function (string $subject, string $pattern, int $flags = 0): mixed { + if (preg_match($pattern, $subject, $matches, $flags) === 1) { + return $matches; + } + + return null; + }, + 'match_all' => function (string $subject, string $pattern, int $flags = PREG_PATTERN_ORDER): array { + if (preg_match_all($pattern, $subject, $matches, $flags) > 0) { + return $matches; + } + + return []; + }, + 'gsub' => function (string $subject, string $pattern, string $replacement, int $limit = -1): string { + return preg_replace($pattern, $replacement, $subject, $limit) ?? $subject; + }, + ]); + + $sandbox->load(' + json = { + decode = function(s) + if type(s) ~= "string" then + error("json.decode: expected string, got " .. type(s), 2) + end + return __json.decode(s) + end, + encode = function(v) + return __json.encode(v) + end + } + + regex = { + match = function(subject, pattern, flags) + if type(subject) ~= "string" then + error("regex.match: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.match: expected string pattern, got " .. type(pattern), 2) + end + return __regex.match(subject, pattern, flags or 0) + end, + match_all = function(subject, pattern, flags) + if type(subject) ~= "string" then + error("regex.match_all: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.match_all: expected string pattern, got " .. type(pattern), 2) + end + return __regex.match_all(subject, pattern, flags or 0) + end, + gsub = function(subject, pattern, replacement, limit) + if type(subject) ~= "string" then + error("regex.gsub: expected string subject, got " .. type(subject), 2) + end + if type(pattern) ~= "string" then + error("regex.gsub: expected string pattern, got " .. type(pattern), 2) + end + if type(replacement) ~= "string" then + error("regex.gsub: expected string replacement, got " .. type(replacement), 2) + end + return __regex.gsub(subject, pattern, replacement, limit or -1) + end, + } + ')->call(); + } } diff --git a/app/Services/Memory/CompactionMemoryExtractor.php b/app/Services/Memory/CompactionMemoryExtractor.php new file mode 100644 index 0000000..ff2a8ba --- /dev/null +++ b/app/Services/Memory/CompactionMemoryExtractor.php @@ -0,0 +1,41 @@ +extractSectionBullets($summary, 'Durable Facts'), + $this->extractSectionBullets($summary, 'Decisions'), + ); + + return array_values(array_unique(array_filter(array_map( + fn (string $item): string => trim(preg_replace('/\s+/', ' ', $item) ?? ''), + $items, + )))); + } + + /** + * @return string[] + */ + private function extractSectionBullets(string $summary, string $heading): array + { + $pattern = sprintf( + '/^##\s+%s\s*$([\s\S]*?)(?=^##\s+|\z)/mi', + preg_quote($heading, '/'), + ); + + if (! preg_match($pattern, $summary, $matches)) { + return []; + } + + preg_match_all('/^\-\s+(.*)$/m', trim($matches[1]), $bullets); + + return $bullets[1] ?? []; + } +} diff --git a/app/Services/Memory/CompactionPlan.php b/app/Services/Memory/CompactionPlan.php new file mode 100644 index 0000000..9efa40c --- /dev/null +++ b/app/Services/Memory/CompactionPlan.php @@ -0,0 +1,25 @@ + $messagesToSummarize + * @param Collection $messagesToKeep + */ + public function __construct( + public readonly Collection $messagesToSummarize, + public readonly Collection $messagesToKeep, + public readonly int $splitIndex, + public readonly int $tokensToSummarize, + public readonly int $tokensToKeep, + ) {} + + public function lastSummarizedMessageId(): ?string + { + return $this->messagesToSummarize->last()?->id; + } +} diff --git a/app/Services/Memory/ContextBudget.php b/app/Services/Memory/ContextBudget.php new file mode 100644 index 0000000..0d1cad8 --- /dev/null +++ b/app/Services/Memory/ContextBudget.php @@ -0,0 +1,92 @@ + $messages + * @return array + */ + public function snapshotForAgent(User $agent, iterable $messages, ?string $systemPrompt = null): array + { + $resolved = $this->providerResolver->resolve($agent); + + return $this->snapshot( + $resolved['provider'], + $resolved['model'], + $this->estimateMessagesTokens($messages), + $systemPrompt, + ); + } + + /** + * @return array + */ + public function snapshot( + string $provider, + string $model, + int $messageTokens, + ?string $systemPrompt = null, + ): array { + $contextWindow = $this->contextRegistry->getContextWindow($model, $provider); + $systemTokens = $systemPrompt !== null + ? TokenEstimator::estimate($systemPrompt) + : config('memory.compaction.system_prompt_fallback_reserve', 10_000); + $outputReserve = (int) config('memory.compaction.output_reserve', 4_096); + $effectiveWindow = max(1, $contextWindow - $systemTokens - $outputReserve); + $safetyMargin = (float) config('memory.compaction.safety_margin', 1.2); + $adjustedMessageTokens = (int) ceil($messageTokens * $safetyMargin); + $warningThreshold = (int) floor($effectiveWindow * (float) config('memory.budget.warning_ratio', 0.65)); + $compactionThreshold = (int) floor($effectiveWindow * (float) config('memory.compaction.threshold_ratio', 0.75)); + $flushThreshold = max(1, $compactionThreshold - (int) config('memory.memory_flush.soft_threshold_tokens', 4_000)); + $blockingThreshold = max( + $compactionThreshold, + $effectiveWindow - (int) config('memory.budget.blocking_margin_tokens', 1_024), + ); + $percentLeft = max(0, (int) round((($effectiveWindow - $adjustedMessageTokens) / $effectiveWindow) * 100)); + + return [ + 'provider' => $provider, + 'model' => $model, + 'context_window' => $contextWindow, + 'system_tokens' => $systemTokens, + 'output_reserve' => $outputReserve, + 'effective_window' => $effectiveWindow, + 'raw_message_tokens' => $messageTokens, + 'adjusted_message_tokens' => $adjustedMessageTokens, + 'safety_margin' => $safetyMargin, + 'warning_threshold' => $warningThreshold, + 'flush_threshold' => $flushThreshold, + 'compaction_threshold' => $compactionThreshold, + 'blocking_threshold' => $blockingThreshold, + 'percent_left' => $percentLeft, + 'is_above_warning' => $adjustedMessageTokens >= $warningThreshold, + 'is_above_flush' => $adjustedMessageTokens >= $flushThreshold, + 'is_above_compaction' => $adjustedMessageTokens > $compactionThreshold, + 'is_at_blocking_limit' => $adjustedMessageTokens >= $blockingThreshold, + ]; + } + + /** + * @param iterable $messages + */ + public function estimateMessagesTokens(iterable $messages): int + { + $total = 0; + + foreach ($messages as $message) { + $total += TokenEstimator::estimate((string) ($message->content ?? '')); + } + + return $total; + } +} diff --git a/app/Services/Memory/ContextPruner.php b/app/Services/Memory/ContextPruner.php new file mode 100644 index 0000000..94d633e --- /dev/null +++ b/app/Services/Memory/ContextPruner.php @@ -0,0 +1,143 @@ + $messages + * @return array{messages: array, pruned_results: int, estimated_tokens_saved: int} + */ + public function prune(array $messages): array + { + if (! config('memory.pruning.enabled', true)) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $candidates = []; + + foreach ($messages as $index => $message) { + if (! $message instanceof ToolResultMessage) { + continue; + } + + $savings = 0; + $eligible = true; + + foreach ($message->toolResults as $toolResult) { + $toolType = $this->toolRegistry->getToolTypeBySlug($toolResult->name); + + if ($toolType !== 'read') { + $eligible = false; + break; + } + + $serialized = $this->serializeResult($toolResult->result); + if ($serialized === null) { + $eligible = false; + break; + } + + $tokens = TokenEstimator::estimate($serialized); + if ($tokens < (int) config('memory.pruning.min_result_tokens', 400)) { + $eligible = false; + break; + } + + $placeholderTokens = TokenEstimator::estimate($this->placeholder($toolResult->name)); + $savings += max(0, $tokens - $placeholderTokens); + } + + if (! $eligible || $savings <= 0) { + continue; + } + + $candidates[] = [ + 'index' => $index, + 'tokens_saved' => $savings, + 'message' => $message, + ]; + } + + $keepRecent = (int) config('memory.pruning.keep_recent_read_results', 2); + if (count($candidates) <= $keepRecent) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $prunable = array_slice($candidates, 0, max(0, count($candidates) - $keepRecent)); + $tokensSaved = array_sum(array_column($prunable, 'tokens_saved')); + + if ($tokensSaved < (int) config('memory.pruning.min_total_saved_tokens', 1_000)) { + return [ + 'messages' => $messages, + 'pruned_results' => 0, + 'estimated_tokens_saved' => 0, + ]; + } + + $prunedResults = 0; + + foreach ($prunable as $candidate) { + /** @var ToolResultMessage $toolResultMessage */ + $toolResultMessage = $candidate['message']; + + $messages[$candidate['index']] = new ToolResultMessage( + $toolResultMessage->toolResults->map(function (ToolResult $toolResult) use (&$prunedResults) { + $prunedResults++; + + return new ToolResult( + id: $toolResult->id, + name: $toolResult->name, + arguments: $toolResult->arguments, + result: $this->placeholder($toolResult->name), + resultId: $toolResult->resultId, + ); + }) + ); + } + + return [ + 'messages' => $messages, + 'pruned_results' => $prunedResults, + 'estimated_tokens_saved' => $tokensSaved, + ]; + } + + private function placeholder(string $toolName): string + { + return "[Earlier {$toolName} read result omitted from retry context. Re-run the tool if you still need the full output.]"; + } + + private function serializeResult(mixed $result): ?string + { + if (is_string($result)) { + return trim($result) !== '' ? $result : null; + } + + if (is_array($result)) { + $encoded = json_encode($result, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + + return $encoded !== false && $encoded !== '[]' ? $encoded : null; + } + + return null; + } +} diff --git a/app/Services/Memory/ConversationCompactionService.php b/app/Services/Memory/ConversationCompactionService.php index 923f102..a645878 100644 --- a/app/Services/Memory/ConversationCompactionService.php +++ b/app/Services/Memory/ConversationCompactionService.php @@ -7,7 +7,9 @@ use App\Models\ConversationSummary; use App\Models\Message; use App\Models\User; +use App\Services\AgentDocumentService; use Illuminate\Support\Facades\Log; +use Illuminate\Support\Str; use Laravel\Ai\Messages\AssistantMessage; use Laravel\Ai\Messages\UserMessage; use Prism\Prism\Facades\Prism; @@ -15,8 +17,11 @@ class ConversationCompactionService { public function __construct( - private ModelContextRegistry $contextRegistry, + private ContextBudget $contextBudget, private DynamicProviderResolver $providerResolver, + private CompactionMemoryExtractor $memoryExtractor, + private AgentDocumentService $documentService, + private DocumentIndexingService $documentIndexingService, ) {} /** @@ -33,27 +38,16 @@ public function needsCompaction(string $channelId, User $agent, iterable $messag } try { - $resolved = $this->providerResolver->resolve($agent); - $contextWindow = $this->contextRegistry->getContextWindow($resolved['model']); - } catch (\Throwable) { - return false; - } - - $systemTokens = $systemPrompt - ? $this->estimateTokenCount($systemPrompt) - : config('memory.compaction.system_prompt_fallback_reserve', 10_000); - $outputReserve = config('memory.compaction.output_reserve', 4_096); - $available = $contextWindow - $systemTokens - $outputReserve; + if ($this->isCircuitOpen($channelId, $agent)) { + return false; + } - if ($available <= 0) { + $budget = $this->contextBudget->snapshotForAgent($agent, $messages, $systemPrompt); + } catch (\Throwable) { return false; } - $messageTokens = $this->estimateMessagesTokens($messages); - $safetyMargin = config('memory.compaction.safety_margin', 1.2); - $threshold = $available * config('memory.compaction.threshold_ratio', 0.75); - - return ($messageTokens * $safetyMargin) > $threshold; + return (bool) $budget['is_above_compaction']; } /** @@ -65,6 +59,16 @@ public function compact(string $channelId, User $agent): ?ConversationSummary ->where('agent_id', $agent->id) ->first(); + if ($existing?->compaction_circuit_open_until?->isFuture()) { + Log::warning('Skipping compaction while circuit is open', [ + 'channel_id' => $channelId, + 'agent' => $agent->name, + 'open_until' => $existing->compaction_circuit_open_until?->toIso8601String(), + ]); + + return null; + } + // Only load messages after the previous compaction point $query = Message::where('channel_id', $channelId) ->orderBy('created_at', 'asc'); @@ -82,32 +86,16 @@ public function compact(string $channelId, User $agent): ?ConversationSummary return null; } - $keepRecentTokens = config('memory.compaction.keep_recent_tokens', 20_000); - $minKeep = config('memory.compaction.min_keep_messages', 3); - - // Walk from newest to oldest, accumulating tokens until budget is exceeded - $keptTokens = 0; - $splitIndex = 0; - for ($i = $messages->count() - 1; $i >= 0; $i--) { - $msgTokens = $this->estimateTokenCount($messages[$i]->content ?? ''); - if ($keptTokens + $msgTokens > $keepRecentTokens - && ($messages->count() - $i - 1) >= $minKeep) { - $splitIndex = $i + 1; - break; - } - $keptTokens += $msgTokens; - } - - if ($splitIndex <= 0) { + $plan = $this->buildPlan($messages); + if ($plan === null) { return null; } - $toSummarize = $messages->slice(0, $splitIndex)->values(); $previousSummary = $existing->summary ?? ''; // Build SDK messages for summarization $sdkMessages = []; - foreach ($toSummarize as $msg) { + foreach ($plan->messagesToSummarize as $msg) { if (empty($msg->content)) { continue; } @@ -119,8 +107,21 @@ public function compact(string $channelId, User $agent): ?ConversationSummary } } - $summaryText = $this->summarize($sdkMessages, $previousSummary); - $tokensBefore = $this->estimateMessagesTokens($sdkMessages); + try { + $summaryText = $this->summarize($sdkMessages, $previousSummary, $plan); + } catch (\Throwable $e) { + $this->recordFailure($channelId, $agent, $existing, $e); + + Log::error('Conversation summarization failed', [ + 'channel_id' => $channelId, + 'agent' => $agent->name, + 'error' => $e->getMessage(), + ]); + + return null; + } + + $tokensBefore = $plan->tokensToSummarize; $summary = ConversationSummary::updateOrCreate( ['channel_id' => $channelId, 'agent_id' => $agent->id], @@ -130,11 +131,17 @@ public function compact(string $channelId, User $agent): ?ConversationSummary 'tokens_after' => $this->estimateTokenCount($summaryText), 'compaction_count' => ($existing->compaction_count ?? 0) + 1, 'flush_count' => 0, // Reset for new compaction cycle + 'compaction_failure_count' => 0, + 'last_compaction_failed_at' => null, + 'compaction_circuit_open_until' => null, + 'last_compaction_error' => null, 'messages_summarized' => ($existing->messages_summarized ?? 0) + count($sdkMessages), - 'last_message_id' => $toSummarize->last()->id ?? $existing->last_message_id, + 'last_message_id' => $plan->lastSummarizedMessageId() ?? $existing->last_message_id, ] ); + $this->extractDurableMemories($agent, $summaryText); + Log::info('Conversation compacted', [ 'channel_id' => $channelId, 'agent' => $agent->name, @@ -142,6 +149,8 @@ public function compact(string $channelId, User $agent): ?ConversationSummary 'tokens_before' => $tokensBefore, 'tokens_after' => $summary->tokens_after, 'compaction_count' => $summary->compaction_count, + 'split_index' => $plan->splitIndex, + 'tokens_kept' => $plan->tokensToKeep, ]); return $summary; @@ -152,14 +161,19 @@ public function compact(string $channelId, User $agent): ?ConversationSummary * * @param array $messages */ - private function summarize(array $messages, string $previousSummary): string + private function summarize(array $messages, string $previousSummary, CompactionPlan $plan): string { - $prompt = "You are summarizing a conversation for an AI agent's context window.\n\n"; + $prompt = "You are summarizing older OpenCompany conversation history for later retrieval.\n\n"; if ($previousSummary) { $prompt .= "Previous summary of even older messages:\n{$previousSummary}\n\n"; } + $prompt .= "Compaction plan:\n"; + $prompt .= "- Messages being summarized: {$plan->messagesToSummarize->count()}\n"; + $prompt .= "- Messages kept verbatim after the split: {$plan->messagesToKeep->count()}\n"; + $prompt .= "- Tokens kept verbatim: {$plan->tokensToKeep}\n\n"; + $prompt .= "Messages to summarize:\n"; foreach ($messages as $msg) { $role = $msg instanceof AssistantMessage ? 'assistant' : 'user'; @@ -167,10 +181,14 @@ private function summarize(array $messages, string $previousSummary): string $prompt .= "[{$role}]: {$content}\n"; } - $prompt .= "\nCreate a concise summary that captures:\n"; - $prompt .= "- Key topics discussed\n- Decisions made\n- Action items\n- Important context\n"; - $prompt .= "- User preferences expressed\n\n"; - $prompt .= "Be factual and specific. Preserve names, dates, and technical details."; + $prompt .= "\nReturn markdown with these exact headings:\n"; + $prompt .= "## Objectives\n## Decisions\n## Open Work\n## Durable Facts\n## References\n\n"; + $prompt .= "Rules:\n"; + $prompt .= "- Use short bullet lists under every heading.\n"; + $prompt .= "- Include names, dates, tool outputs, IDs, and file paths when they matter.\n"; + $prompt .= "- Put reusable preferences, standing decisions, and durable facts under Durable Facts.\n"; + $prompt .= "- If a section has nothing important, write a single bullet: - none\n"; + $prompt .= "- Do not invent anything.\n"; [$provider, $model] = AppSetting::resolveProviderModel( 'memory_summary_model', 'memory.compaction.summary_model' @@ -191,8 +209,117 @@ private function summarize(array $messages, string $previousSummary): string return $response->text; } catch (\Throwable $e) { - Log::error('Conversation summarization failed', ['error' => $e->getMessage()]); - return $previousSummary ?: '[Summary generation failed]'; + throw $e; + } + } + + /** + * @param \Illuminate\Support\Collection $messages + */ + private function buildPlan(\Illuminate\Support\Collection $messages): ?CompactionPlan + { + $keepRecentTokens = (int) config('memory.compaction.keep_recent_tokens', 20_000); + $minKeep = (int) config('memory.compaction.min_keep_messages', 3); + $keptTokens = 0; + $splitIndex = 0; + + for ($i = $messages->count() - 1; $i >= 0; $i--) { + $msgTokens = $this->estimateTokenCount((string) ($messages[$i]->content ?? '')); + + if ($keptTokens + $msgTokens > $keepRecentTokens + && ($messages->count() - $i - 1) >= $minKeep) { + $splitIndex = $i + 1; + break; + } + + $keptTokens += $msgTokens; + } + + if ($splitIndex <= 0) { + return null; + } + + $toSummarize = $messages->slice(0, $splitIndex)->values(); + $toKeep = $messages->slice($splitIndex)->values(); + + return new CompactionPlan( + messagesToSummarize: $toSummarize, + messagesToKeep: $toKeep, + splitIndex: $splitIndex, + tokensToSummarize: $this->estimateMessagesTokens($toSummarize), + tokensToKeep: $this->estimateMessagesTokens($toKeep), + ); + } + + private function isCircuitOpen(string $channelId, User $agent): bool + { + $summary = ConversationSummary::where('channel_id', $channelId) + ->where('agent_id', $agent->id) + ->first(); + + return $summary?->compaction_circuit_open_until?->isFuture() ?? false; + } + + private function recordFailure(string $channelId, User $agent, ?ConversationSummary $existing, \Throwable $error): void + { + $failureCount = ($existing?->compaction_failure_count ?? 0) + 1; + $tripAfter = (int) config('memory.compaction.circuit_breaker.after_failures', 3); + $cooldownMinutes = (int) config('memory.compaction.circuit_breaker.cooldown_minutes', 30); + + ConversationSummary::updateOrCreate( + ['channel_id' => $channelId, 'agent_id' => $agent->id], + [ + 'workspace_id' => $agent->workspace_id ?? workspace()->id, + 'summary' => $existing->summary ?? '', + 'tokens_before' => $existing->tokens_before ?? 0, + 'tokens_after' => $existing->tokens_after ?? 0, + 'compaction_count' => $existing->compaction_count ?? 0, + 'flush_count' => $existing->flush_count ?? 0, + 'messages_summarized' => $existing->messages_summarized ?? 0, + 'last_message_id' => $existing?->last_message_id, + 'compaction_failure_count' => $failureCount, + 'last_compaction_failed_at' => now(), + 'compaction_circuit_open_until' => $failureCount >= $tripAfter + ? now()->addMinutes($cooldownMinutes) + : null, + 'last_compaction_error' => Str::limit($error->getMessage(), 4_000), + ] + ); + } + + private function extractDurableMemories(User $agent, string $summary): void + { + if (! config('memory.compaction.memory_extraction.enabled', true)) { + return; + } + + $items = array_slice( + $this->memoryExtractor->extract($summary), + 0, + (int) config('memory.compaction.memory_extraction.max_items', 8), + ); + + if ($items === []) { + return; + } + + $entry = "### [compaction] " . now()->format('H:i') . "\n\n"; + $entry .= implode("\n", array_map( + fn (string $item): string => "- {$item}", + $items, + )); + + try { + $document = $this->documentService->createMemoryLog($agent, $entry); + + if ($document !== null) { + $this->documentIndexingService->index($document, 'memory', $agent->id); + } + } catch (\Throwable $e) { + Log::warning('Failed to persist extracted compaction memories', [ + 'agent' => $agent->name, + 'error' => $e->getMessage(), + ]); } } diff --git a/app/Services/Memory/MemoryFlushService.php b/app/Services/Memory/MemoryFlushService.php index 5aff2a0..4b6d8a5 100644 --- a/app/Services/Memory/MemoryFlushService.php +++ b/app/Services/Memory/MemoryFlushService.php @@ -3,17 +3,15 @@ namespace App\Services\Memory; use App\Agents\OpenCompanyAgent; -use App\Agents\Providers\DynamicProviderResolver; use App\Models\ConversationSummary; use App\Models\User; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Bridge\SystemPromptBag; class MemoryFlushService { public function __construct( - private ConversationCompactionService $compactionService, - private ModelContextRegistry $contextRegistry, - private DynamicProviderResolver $providerResolver, + private ContextBudget $contextBudget, ) {} /** @@ -41,40 +39,13 @@ public function shouldFlush(string $channelId, User $agent, iterable $messages, return false; } - // Resolve model context window try { - $resolved = $this->providerResolver->resolve($agent); - $contextWindow = $this->contextRegistry->getContextWindow($resolved['model']); + $budget = $this->contextBudget->snapshotForAgent($agent, $messages, $systemPrompt); } catch (\Throwable) { return false; } - // Calculate available context (same logic as compaction) - $systemTokens = $systemPrompt - ? $this->compactionService->estimateTokenCount($systemPrompt) - : config('memory.compaction.system_prompt_fallback_reserve', 10_000); - $outputReserve = config('memory.compaction.output_reserve', 4_096); - $available = $contextWindow - $systemTokens - $outputReserve; - - if ($available <= 0) { - return false; - } - - // Estimate message tokens with safety margin - $messageTokens = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $messageTokens += $this->compactionService->estimateTokenCount($content); - } - - $safetyMargin = config('memory.compaction.safety_margin', 1.2); - $adjustedTokens = (int) ($messageTokens * $safetyMargin); - $compactionThreshold = (int) ($available * config('memory.compaction.threshold_ratio', 0.75)); - $softThresholdTokens = config('memory.memory_flush.soft_threshold_tokens', 4000); - $softZoneStart = $compactionThreshold - $softThresholdTokens; - - // Flush when context is within the soft zone (approaching compaction but not yet exceeding it) - return $adjustedTokens > $softZoneStart && $adjustedTokens <= $compactionThreshold; + return (bool) $budget['is_above_flush'] && ! (bool) $budget['is_above_compaction']; } /** @@ -87,6 +58,9 @@ public function shouldFlush(string $channelId, User $agent, iterable $messages, public function flush(string $channelId, User $agent): void { $agentInstance = OpenCompanyAgent::for($agent, $channelId); + app()->instance(SystemPromptBag::class, new SystemPromptBag( + $agentInstance->systemPrompts() + )); $agentInstance->prompt($this->buildFlushPrompt()); // Increment flush count (create summary record if needed) diff --git a/app/Services/Memory/ModelContextRegistry.php b/app/Services/Memory/ModelContextRegistry.php index 987a4a6..8a540c9 100644 --- a/app/Services/Memory/ModelContextRegistry.php +++ b/app/Services/Memory/ModelContextRegistry.php @@ -4,6 +4,7 @@ use App\Models\AppSetting; use Illuminate\Support\Facades\Log; +use OpenCompany\PrismRelay\Meta\ProviderMeta; class ModelContextRegistry { @@ -12,31 +13,34 @@ class ModelContextRegistry */ private const LEVENSHTEIN_MAX_DISTANCE = 5; + public function __construct( + private ProviderMeta $providerMeta, + ) {} + /** * Get the context window size (in tokens) for a given model. * * Lookup order: * 1. User overrides from AppSetting (admin-configurable) - * 2. Built-in registry: exact match, then longest prefix match - * 3. Levenshtein fuzzy match (closest known model within distance threshold) - * 4. Default (conservative 32K) + * 2. prism-relay provider metadata when a provider is known + * 3. Local fallback registry: exact match, then longest prefix match + * 4. Levenshtein fuzzy match (closest known model within distance threshold) + * 5. Default (conservative 32K) */ - public function getContextWindow(string $model): int + public function getContextWindow(string $model, ?string $provider = null): int { $overrides = $this->getUserOverrides(); - $builtIn = config('memory.context_windows.models', []); + $exactOverride = $this->exactOverride($overrides, $model, $provider); - // 1. User overrides — exact match takes highest priority - if (isset($overrides[$model])) { - return (int) $overrides[$model]; + if ($exactOverride !== null) { + return $exactOverride; } - // 2a. Built-in exact match - if (isset($builtIn[$model])) { - return $builtIn[$model]; + if ($provider !== null && $this->providerMeta->has($provider)) { + return $this->providerMeta->contextWindow($provider, $model); } - // 2b. Longest prefix match across both built-in and user overrides + $builtIn = config('memory.context_windows.models', []); $allModels = array_merge($builtIn, $overrides); $prefixResult = $this->longestPrefixMatch($model, $allModels); if ($prefixResult !== null) { @@ -126,4 +130,20 @@ private function getUserOverrides(): array return is_array($value) ? $value : []; } + + /** + * @param array $overrides + */ + private function exactOverride(array $overrides, string $model, ?string $provider): ?int + { + if ($provider !== null && isset($overrides["{$provider}:{$model}"])) { + return (int) $overrides["{$provider}:{$model}"]; + } + + if (isset($overrides[$model])) { + return (int) $overrides[$model]; + } + + return null; + } } diff --git a/app/Services/Memory/OutputTruncator.php b/app/Services/Memory/OutputTruncator.php new file mode 100644 index 0000000..3743315 --- /dev/null +++ b/app/Services/Memory/OutputTruncator.php @@ -0,0 +1,60 @@ +maxLines ??= (int) config('memory.tool_results.max_lines', 2000); + $this->maxBytes ??= (int) config('memory.tool_results.max_bytes', 50_000); + $this->disk ??= (string) config('memory.tool_results.disk', 'local'); + $this->pathPrefix ??= trim((string) config('memory.tool_results.path', 'agent-tool-results'), '/'); + } + + public function truncate(mixed $result, string $toolCallId): mixed + { + if (! is_string($result)) { + return $result; + } + + $lines = substr_count($result, "\n") + 1; + $bytes = strlen($result); + + if ($lines <= $this->maxLines && $bytes <= $this->maxBytes) { + return $result; + } + + $storagePath = $this->storeFullOutput($result, $toolCallId); + $truncated = $result; + + if ($lines > $this->maxLines) { + $truncated = implode("\n", array_slice(explode("\n", $truncated), 0, $this->maxLines)); + } + + if (strlen($truncated) > $this->maxBytes) { + $truncated = mb_strcut($truncated, 0, $this->maxBytes, 'UTF-8'); + } + + return $truncated."\n\n[truncated - full output stored at storage:{$storagePath}]"; + } + + private function storeFullOutput(string $result, string $toolCallId): string + { + $datePath = now()->format('Y/m/d'); + $safeId = trim(preg_replace('/[^a-zA-Z0-9_-]/', '_', $toolCallId) ?? '', '_'); + $safeId = $safeId !== '' ? $safeId : Str::random(12); + $path = "{$this->pathPrefix}/{$datePath}/tool_{$safeId}.txt"; + + Storage::disk($this->disk)->put($path, $result); + + return $path; + } +} diff --git a/app/Services/Memory/PromptFrameBuilder.php b/app/Services/Memory/PromptFrameBuilder.php new file mode 100644 index 0000000..9a5536c --- /dev/null +++ b/app/Services/Memory/PromptFrameBuilder.php @@ -0,0 +1,78 @@ + $sections + * @param string[]|null $volatileLabels + * @return array{ + * stable_prompt: string, + * volatile_prompt: string, + * full_prompt: string, + * stable_sections: array, + * volatile_sections: array, + * stable_breakdown: array, + * volatile_breakdown: array, + * full_breakdown: array + * } + */ + public function splitSections(array $sections, ?array $volatileLabels = null): array + { + $volatileLabels ??= self::DEFAULT_VOLATILE_SECTION_LABELS; + $stable = []; + $volatile = []; + + foreach ($sections as $section) { + if (in_array($section['label'], $volatileLabels, true)) { + $volatile[] = $section; + } else { + $stable[] = $section; + } + } + + return [ + 'stable_prompt' => $this->join($stable), + 'volatile_prompt' => $this->join($volatile), + 'full_prompt' => $this->join($sections), + 'stable_sections' => $stable, + 'volatile_sections' => $volatile, + 'stable_breakdown' => $this->breakdown($stable), + 'volatile_breakdown' => $this->breakdown($volatile), + 'full_breakdown' => $this->breakdown($sections), + ]; + } + + /** + * @param array $sections + */ + private function join(array $sections): string + { + return implode('', array_column($sections, 'content')); + } + + /** + * @param array $sections + * @return array + */ + private function breakdown(array $sections): array + { + return array_values(array_map( + fn (array $section) => [ + 'label' => $section['label'], + 'chars' => mb_strlen($section['content']), + ], + $sections, + )); + } +} diff --git a/app/Services/Memory/ToolResultDeduplicator.php b/app/Services/Memory/ToolResultDeduplicator.php new file mode 100644 index 0000000..5c00ac0 --- /dev/null +++ b/app/Services/Memory/ToolResultDeduplicator.php @@ -0,0 +1,124 @@ + $messages + * @return array{messages: array, deduplicated: int} + */ + public function deduplicate(array $messages): array + { + $count = count($messages); + if ($count < 2) { + return ['messages' => $messages, 'deduplicated' => 0]; + } + + $latestBySig = []; + for ($i = $count - 1; $i >= 0; $i--) { + if (! $messages[$i] instanceof ToolResultMessage) { + continue; + } + + $results = $messages[$i]->toolResults->values(); + for ($rIdx = $results->count() - 1; $rIdx >= 0; $rIdx--) { + /** @var ToolResult $result */ + $result = $results[$rIdx]; + + if ($this->isSuperseded($result->result)) { + continue; + } + + if (! $this->shouldDeduplicate($result)) { + continue; + } + + $sig = $this->signature($result); + if (! isset($latestBySig[$sig])) { + $latestBySig[$sig] = [$i, $rIdx]; + } + } + } + + $deduplicated = 0; + + for ($i = 0; $i < $count; $i++) { + if (! $messages[$i] instanceof ToolResultMessage) { + continue; + } + + $toolResults = $messages[$i]->toolResults->values(); + + foreach ($toolResults as $rIdx => $result) { + if (! $result instanceof ToolResult) { + continue; + } + + if ($this->isSuperseded($result->result)) { + continue; + } + + if (! $this->shouldDeduplicate($result)) { + continue; + } + + $sig = $this->signature($result); + if (isset($latestBySig[$sig]) && $latestBySig[$sig] !== [$i, $rIdx]) { + $toolResults[$rIdx] = $this->supersede($result, self::EXACT_SUPERSEDE); + $deduplicated++; + } + } + + $messages[$i]->toolResults = $toolResults; + } + + return ['messages' => $messages, 'deduplicated' => $deduplicated]; + } + + private function supersede(ToolResult $result, string $placeholder): ToolResult + { + return new ToolResult( + id: $result->id, + name: $result->name, + arguments: $result->arguments, + result: $placeholder, + resultId: $result->resultId, + ); + } + + private function signature(ToolResult $result): string + { + $args = $result->arguments; + ksort($args); + $resultString = is_string($result->result) + ? $result->result + : json_encode($result->result, JSON_INVALID_UTF8_SUBSTITUTE); + + return $result->name.':'.json_encode($args, JSON_THROW_ON_ERROR | JSON_INVALID_UTF8_SUBSTITUTE).':'.md5((string) $resultString); + } + + private function isSuperseded(mixed $result): bool + { + if (! is_string($result)) { + return false; + } + + return str_starts_with($result, '[Superseded'); + } + + private function shouldDeduplicate(ToolResult $result): bool + { + return $this->toolRegistry->getToolTypeBySlug($result->name) === 'read'; + } +} diff --git a/config/memory.php b/config/memory.php index 09fe1c5..87a197b 100644 --- a/config/memory.php +++ b/config/memory.php @@ -137,6 +137,22 @@ ], ], + /* + |-------------------------------------------------------------------------- + | Context Budget + |-------------------------------------------------------------------------- + | + | Shared token-budget thresholds used by flush, compaction, and retry + | protection. Context windows come from prism-relay when the provider is + | known; the local model map above remains the fallback registry. + | + */ + + 'budget' => [ + 'warning_ratio' => (float) env('MEMORY_WARNING_RATIO', 0.65), + 'blocking_margin_tokens' => (int) env('MEMORY_BLOCKING_MARGIN_TOKENS', 1_024), + ], + /* |-------------------------------------------------------------------------- | Memory Scope @@ -171,6 +187,14 @@ 'system_prompt_fallback_reserve' => 10_000, 'summary_model' => env('MEMORY_SUMMARY_MODEL', 'anthropic:claude-sonnet-4-5-20250929'), 'summary_max_tokens' => 2_000, + 'circuit_breaker' => [ + 'after_failures' => (int) env('MEMORY_COMPACTION_CIRCUIT_AFTER_FAILURES', 3), + 'cooldown_minutes' => (int) env('MEMORY_COMPACTION_CIRCUIT_COOLDOWN_MINUTES', 30), + ], + 'memory_extraction' => [ + 'enabled' => env('MEMORY_COMPACTION_EXTRACT_TO_LOG', true), + 'max_items' => (int) env('MEMORY_COMPACTION_EXTRACT_MAX_ITEMS', 8), + ], ], /* @@ -204,4 +228,40 @@ 'max_flushes_per_cycle' => 1, ], + /* + |-------------------------------------------------------------------------- + | Retry Context Pruning + |-------------------------------------------------------------------------- + | + | Checkpoint resume can append many historical read tool results to the + | retry prompt. Pruning clears only older OpenCompany read-tool payloads + | while leaving recent results and write-side effects intact. + | + */ + + 'pruning' => [ + 'enabled' => env('MEMORY_PRUNING_ENABLED', true), + 'keep_recent_read_results' => (int) env('MEMORY_PRUNING_KEEP_RECENT_READ_RESULTS', 2), + 'min_result_tokens' => (int) env('MEMORY_PRUNING_MIN_RESULT_TOKENS', 400), + 'min_total_saved_tokens' => (int) env('MEMORY_PRUNING_MIN_TOTAL_SAVED_TOKENS', 1_000), + ], + + /* + |-------------------------------------------------------------------------- + | Tool Result Checkpoints + |-------------------------------------------------------------------------- + | + | Retry checkpoint tool results can get large enough to bloat the prompt + | context and task-step payloads. Large string results are truncated for the + | checkpoint while the full payload is persisted on durable storage. + | + */ + + 'tool_results' => [ + 'max_lines' => (int) env('MEMORY_TOOL_RESULT_MAX_LINES', 2_000), + 'max_bytes' => (int) env('MEMORY_TOOL_RESULT_MAX_BYTES', 50_000), + 'disk' => env('MEMORY_TOOL_RESULT_DISK', 'local'), + 'path' => env('MEMORY_TOOL_RESULT_PATH', 'agent-tool-results'), + ], + ]; diff --git a/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php b/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php new file mode 100644 index 0000000..cc24480 --- /dev/null +++ b/database/migrations/2026_04_09_120000_add_compaction_failure_tracking_to_conversation_summaries_table.php @@ -0,0 +1,30 @@ +integer('compaction_failure_count')->default(0)->after('flush_count'); + $table->timestamp('last_compaction_failed_at')->nullable()->after('compaction_failure_count'); + $table->timestamp('compaction_circuit_open_until')->nullable()->after('last_compaction_failed_at'); + $table->text('last_compaction_error')->nullable()->after('compaction_circuit_open_until'); + }); + } + + public function down(): void + { + Schema::table('conversation_summaries', function (Blueprint $table) { + $table->dropColumn([ + 'compaction_failure_count', + 'last_compaction_failed_at', + 'compaction_circuit_open_until', + 'last_compaction_error', + ]); + }); + } +}; diff --git a/docs/INDEX.md b/docs/INDEX.md index 99a1d3d..e4f0d1d 100644 --- a/docs/INDEX.md +++ b/docs/INDEX.md @@ -15,16 +15,17 @@ | [observability.md](architecture/observability.md) | Monitoring, metrics, logging, error tracking, health checks, alerting | Building admin/ops features | | [ai-tool-packages.md](architecture/ai-tool-packages.md) | AI tool package ecosystem — ToolProvider contract, credential abstraction, hybrid ToolRegistry, building new tool packages | Creating or modifying AI tool packages, understanding the plugin architecture | | [interagent-comms.md](architecture/interagent-comms.md) | Inter-agent communication protocol — ContactAgent tool with ask/delegate/notify patterns, DM channels, delegation tracking | Building or debugging agent-to-agent communication | +| [kosmokrator-reuse-audit.md](architecture/kosmokrator-reuse-audit.md) | Full audit of what OpenCompany should reuse, adapt, or skip from KosmoKrator | Planning cross-repo reuse, agent runtime work, metadata consolidation | +| [runtime-alignment-implementation-audit.md](architecture/runtime-alignment-implementation-audit.md) | Post-implementation audit — findings now tracked as Plane issues (OC-1 through OC-6) | Reviewing audit results and fix status | | [ai-tool-strategy.md](strategy/ai-tool-strategy.md) | AI tool ecosystem strategy — package publishing, MCP export, missing tool analysis, Fair Code growth | Planning tool ecosystem, evaluating new tool integrations | ## Planning & Implementation | Document | What it covers | Read when... | |----------|---------------|--------------| -| [implementation-todo.md](planning/implementation-todo.md) | Complete task breakdown across 8+ phases with dependencies, priority order, and file manifests | Starting implementation work, tracking progress | -| [memory-implementation.md](planning/memory-implementation.md) | Memory system design — 6 phases: pgvector, chunking, embedding, hybrid search, compaction, flush **(Status: Complete)** | Understanding the memory architecture | +| [memory-implementation.md](planning/memory-implementation.md) | Memory system architecture reference — STM/LTM model, phase summary **(Status: Complete)** | Understanding the memory architecture | +| [kosmokrator-runtime-alignment-checklist.md](planning/kosmokrator-runtime-alignment-checklist.md) | Checklist for aligning with KosmoKrator/prism-relay — completed work + pointers to open Plane issues | Reviewing runtime-alignment status | | [external-channel-sync.md](external-channel-sync.md) | Bidirectional sync design for Telegram/Discord — message tracking, edit/pin/react sync, channel discovery **(Telegram: Done, Discord: Not started)** | Working on external platform integration | -| [todo.md](todo.md) | Feature TODO list — Docs (starring, search, publish controls), Agent system (budget approval) | Quick check of remaining feature work | | [discord.md](discord.md) | Discord integration documentation — architecture, sidecar, configuration | Setting up or debugging Discord integration | | [codex-subscription-auth.md](planning/codex-subscription-auth.md) | Codex subscription authentication planning | Working on Codex integration | @@ -49,7 +50,6 @@ | Document | What it covers | Read when... | |----------|---------------|--------------| -| [feature-test-map.md](testing/feature-test-map.md) | Checklist of every feature, button, and interaction to test (~500 items) | Manual QA testing | | [qa-strategy.md](testing/qa-strategy.md) | Testing pyramid, CI/CD pipeline, coverage targets, test data management | Setting up automated test infrastructure | ## Tools & Features diff --git a/docs/architecture/kosmokrator-reuse-audit.md b/docs/architecture/kosmokrator-reuse-audit.md new file mode 100644 index 0000000..aa8eaed --- /dev/null +++ b/docs/architecture/kosmokrator-reuse-audit.md @@ -0,0 +1,953 @@ +# KosmoKrator Reuse Audit + +> Full list of what OpenCompany should reuse, adapt, or avoid from the `kosmokrator` repo. +> Scope: compare `/Users/rutger/Sites/opencompany` against `/Users/rutger/Sites/kosmokrator` and identify practical reuse opportunities. + +--- + +## Executive Summary + +OpenCompany and KosmoKrator already share the correct low-level foundation: + +- `prism-php/prism` +- `opencompanyapp/prism-relay` +- `opencompany/prism-codex` +- `opencompanyapp/integration-core` + +That means the main reuse opportunity is **agent runtime infrastructure**, not UI or shell code. + +The strongest reusable areas from KosmoKrator are: + +1. Provider and model cataloging +2. Context management and prompt budgeting +3. Tool result deduplication and output truncation +4. Typed settings schema +5. Skill loading and project-local instruction patterns +6. Subagent orchestration concepts + +The weakest reuse areas are: + +- Symfony TUI and ANSI renderer code +- CLI-specific shell and filesystem tools +- Local desktop install and self-update flows +- Terminal-specific permission UX + +--- + +## Repo Shape + +### OpenCompany + +- Product type: Laravel multi-tenant web app +- Main concerns: workspaces, channels, tasks, documents, approvals, integrations, agent collaboration +- Shared agent tool files: `158` files under `app/Agents/Tools` +- Built-in provider classes: `15` +- Test files: `79` + +Key files: + +- `composer.json` +- `app/Agents/OpenCompanyAgent.php` +- `app/Agents/Tools/ToolRegistry.php` +- `app/Services/Memory/ConversationCompactionService.php` +- `app/Services/Memory/ModelContextRegistry.php` +- `app/Agents/Tools/Agents/ContactAgent.php` + +### KosmoKrator + +- Product type: local CLI coding agent +- Main concerns: code editing, terminal UX, local permissions, session persistence, tool execution, subagent swarms +- Tool files: `44` files under `src/Tool` +- Total PHP source files: much larger runtime core than OpenCompany's agent layer +- Test files: `196` + +Key files: + +- `composer.json` +- `src/Agent/ContextManager.php` +- `src/Agent/SubagentOrchestrator.php` +- `src/Settings/SettingsSchema.php` +- `src/LLM/ProviderCatalog.php` +- `src/LLM/PromptFrameBuilder.php` +- `src/Agent/ToolResultDeduplicator.php` +- `src/Agent/OutputTruncator.php` +- `src/Skill/SkillLoader.php` + +--- + +## Shared Foundation Already In Place + +These are already shared and should remain the main cross-repo seam. + +### 1. Prism / relay / integration core + +Both repos depend on: + +- `prism-php/prism` +- `opencompanyapp/prism-relay` +- `opencompany/prism-codex` +- `opencompanyapp/integration-core` + +Why this matters: + +- LLM provider support should converge here, not inside either app +- Tool contracts should converge here, not inside Laravel-only or Symfony-only abstractions +- Model metadata should become shared here + +Current issue: + +- OpenCompany still keeps provider metadata in `config/integrations.php` +- KosmoKrator keeps richer model/provider metadata in `config/models.yaml`, `config/prism.yaml`, and `src/LLM/ProviderCatalog.php` + +Recommendation: + +- Move provider and model metadata ownership into `prism-relay` +- Make both repos consume the same metadata source + +Priority: `P0` + +--- + +## Reuse Directly Or With Minimal Extraction + +These are the best candidates to port first. + +### 2. Prompt frame splitting for cacheable system prompts + +Source: + +- `src/LLM/PromptFrameBuilder.php` + +Why reuse: + +- OpenCompany already builds prompt sections in `app/Agents/OpenCompanyAgent.php` +- KosmoKrator already splits stable prompt prefix from volatile task content +- This should reduce token waste and improve prompt cache hit rates + +OpenCompany fit: + +- Add a web-safe version of `PromptFrameBuilder` +- Split static identity/instructions from volatile sections such as current task, channel context, and recent runtime state + +Priority: `P0` + +Reuse level: `Direct logic port` + +--- + +### 3. Tool result deduplication + +Source: + +- `src/Agent/ToolResultDeduplicator.php` + +What it does: + +- Replaces stale or repeated tool outputs with short placeholders +- Handles exact duplicates +- Handles stale `file_read` results after edits +- Handles `grep` results later superseded by `file_read` + +Why reuse: + +- OpenCompany agent context can accumulate repeated tool output +- This is pure context hygiene with low product risk + +OpenCompany fit: + +- Adapt the deduper for Laravel AI message/value object types +- Run it before building the final message set sent to the model +- Apply it to OpenCompany's document, task, file, and search style tools where outputs repeat + +Priority: `P0` + +Reuse level: `Adapted port` + +--- + +### 4. Output truncation with persisted full result + +Source: + +- `src/Agent/OutputTruncator.php` + +What it does: + +- Caps large tool output by line and byte count +- Saves the full result to disk +- Keeps only a concise truncated version in the model context + +Why reuse: + +- OpenCompany has many tools that can return oversized payloads +- This prevents context bloat from tables, documents, search results, raw external API payloads, and generated content + +OpenCompany fit: + +- Replace disk persistence with DB or object-storage backed persistence +- Keep the same policy: short preview in prompt, full result stored elsewhere +- Expose a retrieval path for the agent if it needs to inspect the full output later + +Priority: `P0` + +Reuse level: `Adapt architecture, not storage implementation` + +--- + +### 5. Provider catalog and richer provider selection layer + +Source: + +- `src/LLM/ProviderCatalog.php` +- `config/models.yaml` +- `config/prism.yaml` + +What it does well: + +- Provider labels, descriptions, auth modes, ordering +- Provider and model option generation +- Free-text model support for selected providers +- Pulls metadata from relay registry instead of hardcoding everything locally + +Why reuse: + +- OpenCompany's provider config is flatter and more static +- Model metadata is split across config and memory-specific lookup logic +- KosmoKrator has a better abstraction for presenting providers and model capabilities + +OpenCompany fit: + +- Use the same provider catalog pattern for the admin/provider settings UI +- Use shared metadata for model capabilities, pricing, context windows, and default models +- Eliminate duplicate "source of truth" between provider setup and memory budgeting + +Priority: `P0` + +Reuse level: `Extract to shared package or port pattern` + +--- + +### 6. Settings schema pattern + +Source: + +- `src/Settings/SettingsSchema.php` + +What it does: + +- Central typed registry of settings +- Aliases, categories, labels, defaults, effect timing +- Clear separation between storage and schema + +Why reuse: + +- OpenCompany has many agent/runtime settings but no equally explicit typed schema layer +- A schema-driven settings system would simplify validation, admin UI generation, defaults, and API exposure + +OpenCompany fit: + +- Build an `AgentSettingsSchema` or `RuntimeSettingsSchema` +- Use it for workspace defaults, per-agent overrides, and feature flags +- Drive admin forms and validation from schema metadata + +Priority: `P1` + +Reuse level: `Pattern reuse with Laravel implementation` + +--- + +## Pruning, Compaction, And Prompt Caching + +This is the main runtime gap between OpenCompany and KosmoKrator. + +OpenCompany currently has: + +- summary-based conversation compaction in `app/Services/Memory/ConversationCompactionService.php` +- soft-zone memory flushing in `app/Services/Memory/MemoryFlushService.php` +- a local `ModelContextRegistry` for context-window lookup +- basic prompt splitting, checkpoint truncation, and read-tool deduplication + +KosmoKrator adds a fuller context pipeline: + +- `src/Agent/ContextManager.php` coordinates warning, pruning, compaction, and fallback behaviour +- `src/Agent/ContextPruner.php` does cheap micro-pruning before full compaction +- `src/Agent/ContextCompactor.php` builds a structured compaction plan and extracts durable memories +- `src/Agent/ContextBudget.php` centralises warning, auto-compact, and blocking thresholds +- `src/LLM/PromptFrameBuilder.php` is wired into `PrismService` + +`prism-relay` already provides the cache-planning layer: + +- `src/Relay.php` +- `src/Caching/PromptCachePlanner.php` +- `src/Caching/PromptCacheOrchestrator.php` +- `src/Meta/ProviderMeta.php` + +### What OpenCompany should change now + +#### 7. Replace `ModelContextRegistry` with relay-backed metadata + +Current problem: + +- OpenCompany keeps a separate context-window registry in `app/Services/Memory/ModelContextRegistry.php` +- `prism-relay` already knows model context windows via `ProviderMeta::contextWindow()` + +Recommendation: + +- Make `ModelContextRegistry` a thin adapter over `OpenCompany\\PrismRelay\\Meta\\ProviderMeta` +- Keep `AppSetting` overrides on top +- Remove most of the duplicated built-in model registry over time + +Why: + +- One source of truth for context windows +- Better alignment between provider selection, budgeting, and pricing/cache capability + +Priority: `P0` + +#### 8. Introduce a real context budget service + +Current problem: + +- OpenCompany repeats threshold math across `ConversationCompactionService`, `MemoryFlushService`, and `AgentRespondJob` +- The thresholds are estimated ad hoc instead of coming from one snapshot object + +Recommendation: + +- Add an OpenCompany `ContextBudget` service modeled after KosmoKrator's `src/Agent/ContextBudget.php` +- Use it for: + - warning threshold + - flush threshold + - compaction threshold + - hard blocking threshold + - observability snapshots + +Why: + +- Consistent trigger behavior +- Easier tuning per model/provider +- Cleaner logs and debugging + +Priority: `P0` + +#### 9. Add micro-pruning before compaction + +Current problem: + +- OpenCompany jumps from "normal history" to full summarization +- It now truncates checkpointed tool results and deduplicates identical read results, but it still lacks a cheap middle step + +Recommendation: + +- Add an OpenCompany-specific `ContextPruner` +- Scope it to old, large, read-heavy tool results: + - file reads + - search results + - thread/message reads + - document fetches + - table/list reads +- Protect recent turns and recent tool outputs +- Only accept a prune pass if the savings cross a minimum threshold + +Do not port directly: + +- KosmoKrator's `grep`, `glob`, `shell_read`, `bash` assumptions + +Why: + +- Reduces compaction frequency +- Saves tokens without paying an LLM summarization cost +- Fits OpenCompany's many structured read tools well + +Priority: `P0` + +#### 10. Make compaction more structured and failure-aware + +Current problem: + +- OpenCompany compaction is functional but simple: summarize older messages, store summary, continue +- It has no circuit breaker, no hard fallback path, and no structured compaction plan object + +Recommendation: + +- Keep the current `ConversationSummary` persistence model +- Add KosmoKrator-style concepts: + - explicit compaction plan object + - protected context + - compaction failure counter / circuit breaker + - hard fallback when compaction repeatedly fails + - summary-to-memory extraction pass + +Why: + +- More resilient under long-running channels +- Better preservation of durable facts +- Less risk of repeated compaction thrashing + +Priority: `P1` + +#### 11. Wire prompt caching through `prism-relay`, not just prompt splitting + +**Status: DONE** — Resolved via `CachingPrismGateway` in `prism-relay/src/Bridge/`. + +OpenCompany now uses `CachingPrismGateway` (extends `PrismGateway`) for all AI SDK drivers. Before each `prompt()` call, a `SystemPromptBag` with split `[stable, volatile]` prompts is bound in the container. The gateway reads the bag and calls `Relay::planPromptCache()` to annotate system prompts and messages with provider-specific cache control (Anthropic ephemeral, Gemini dedicated, OpenAI auto, OpenRouter ephemeral). No `laravel/ai` vendor patches required. + +Priority: `P0` — ~~resolved~~ + +### What OpenCompany should not copy directly + +#### 12. Do not copy KosmoKrator's pruning rules literally + +Skip direct ports of: + +- `grep` +- `glob` +- `bash` +- `shell_read` +- filesystem-specific stale-read heuristics + +Reason: + +- OpenCompany is not a local coding shell +- Its equivalent high-volume context comes from workspace tools, not Unix tools + +Priority: `P0` + +### Recommended implementation order for this area + +1. Replace context-window lookup with relay-backed metadata. +2. Introduce a shared `ContextBudget` service and move all threshold math there. +3. Add OpenCompany-specific micro-pruning for old read-tool outputs. +4. Improve compaction with a plan object, failure handling, and memory extraction. +5. Wire real provider prompt caching through `prism-relay` at the Laravel AI / Prism gateway layer. + +### Summary judgment + +For OpenCompany: + +- `prism-relay` should become the source of truth for model context windows and prompt-cache planning +- KosmoKrator should inform the context-budget, pruning, and compaction architecture +- The exact pruning heuristics must be rewritten around OpenCompany's read tools and multi-channel/task workflow + +--- + +## Reuse With Meaningful Adaptation + +These are strong ideas, but they need web-native implementations. + +### 7. Full context management pipeline + +Source: + +- `src/Agent/ContextManager.php` +- plus related classes such as `ContextBudget`, `ContextPruner`, `ProtectedContextBuilder`, `MemoryInjector` + +What it does well: + +- Pre-flight context pressure checks +- Micro-pruning before full compaction +- Compaction circuit breaker after repeated failures +- Protected runtime context +- Memory extraction from summaries +- Session-aware context shaping + +Why reuse: + +- OpenCompany's current compaction in `app/Services/Memory/ConversationCompactionService.php` is materially simpler +- KosmoKrator's pipeline is more resilient under pressure + +What to port: + +- Budget snapshots and preflight checks +- Compaction failure circuit breaker +- Protected context concept +- Post-compaction memory extraction +- Distinction between lightweight pruning and expensive compaction + +What not to port as-is: + +- TUI display calls +- local session memory assumptions +- CLI-specific project and directory context + +Priority: `P1` + +Reuse level: `Concept and core logic` + +--- + +### 8. Protected context builder + +Source: + +- `src/Agent/ProtectedContextBuilder.php` + +What it does: + +- Injects runtime facts the model should always see and should not override + +Why reuse: + +- OpenCompany already assembles many system prompt sections, but not all runtime facts are clearly treated as protected +- This helps separate stable policy from mutable user/task context + +OpenCompany fit: + +- Protected facts could include: + - workspace ID and name + - channel ID and type + - acting agent ID and role + - approval mode + - current task ID + - current user visibility scope + +Priority: `P1` + +Reuse level: `Pattern reuse` + +--- + +### 9. Session persistence ideas + +Source: + +- `src/Session/SessionManager.php` + +What it does well: + +- Central session facade +- Message persistence +- auto-title +- history reconstruction +- deduplication on load +- settings and memory scope coordination + +Why reuse: + +- OpenCompany already has first-class message/task/channel persistence, so it does not need the same storage model +- But it can reuse the patterns around resume, checkpointing, and reconstructed history hygiene + +What to reuse: + +- History reconstruction pass +- resume semantics +- checkpoint-aware continuation +- session-level metadata around compaction and recall + +What not to reuse: + +- project-path based scoping +- local SQLite storage assumptions + +Priority: `P2` + +Reuse level: `Patterns only` + +--- + +### 10. Skill system + +Source: + +- `src/Skill/SkillLoader.php` + +What it does well: + +- Loads skills from multiple scopes +- Clear precedence rules +- Lightweight frontmatter-based format + +Why reuse: + +- OpenCompany currently has no real code-level skill system +- Workspace-local or agent-local skills could become a powerful product feature + +Potential OpenCompany adaptation: + +- Workspace skills +- Agent role packs +- Team playbooks +- Department-specific instructions +- Shared procedural knowledge in a structured format + +Suggested storage options: + +- database-backed skill records +- document-backed skills with frontmatter +- repo/project attached skills for external workspaces + +Priority: `P2` + +Reuse level: `Strong feature pattern, not direct file loader copy` + +--- + +## Reuse The Concept, Not The Implementation + +These should influence OpenCompany design, but should not be copied directly. + +### 11. Subagent swarm orchestration + +Source: + +- `src/Agent/SubagentOrchestrator.php` + +What it does well: + +- dependency graphs +- concurrency limits +- sequential groups +- retries +- watchdog cancellation +- background result collection + +Current OpenCompany state: + +- OpenCompany uses agent-to-agent delegation via tasks and channels in `app/Agents/Tools/Agents/ContactAgent.php` +- This is valid for a multi-actor web platform, but less sophisticated as an orchestration runtime + +What to reuse: + +- dependency-aware delegation model +- grouped and sequenced sub-work +- per-agent concurrency limits +- watchdogs and stale-work detection +- richer run-state tracking + +What not to reuse: + +- Amp/Revolt future runtime +- in-process child-agent spawning model +- terminal lifecycle assumptions + +OpenCompany-native implementation should use: + +- Laravel queues +- tasks and task_steps +- events and broadcasts +- database-backed run graphs + +Priority: `P1` + +Reuse level: `Architecture model only` + +--- + +### 12. Permission evaluation chain + +Source: + +- `src/Tool/Permission/PermissionEvaluator.php` + +What it does well: + +- explicit check chain +- fail-closed default +- stage-based policy composition + +Current OpenCompany state: + +- `app/Services/AgentPermissionService.php` is domain-aware and correct for workspaces, agents, folders, channels, and approvals +- But its structure is more app-specific and less composable than the staged evaluator pattern + +What to reuse: + +- explicit evaluation pipeline +- clear deny/ask/allow stages +- central decision object + +What not to reuse: + +- local file path and shell command rules +- Guardian/Argus/Prometheus model semantics + +Priority: `P2` + +Reuse level: `Pattern reuse` + +--- + +### 13. Instruction discovery conventions + +Source: + +- `src/Agent/InstructionLoader.php` + +What it does well: + +- combines global, repo, and local instruction sources with defined precedence + +Why it matters for OpenCompany: + +- OpenCompany already has identity and instruction docs per agent +- The concept could extend to project imports, synced repos, or workspace knowledge packs + +Good reuse targets: + +- imported repository instruction files +- project-specific agent overlays +- workspace-level instruction inheritance + +Priority: `P2` + +Reuse level: `Concept only` + +--- + +## Low Value Or No Value Reuse + +These should stay in KosmoKrator. + +### 14. TUI and ANSI renderer stack + +Sources: + +- `src/UI/Tui/*` +- `src/UI/Ansi/*` + +Why not reuse: + +- OpenCompany is a web app +- the abstractions are clean, but the actual code is terminal-specific + +Possible exception: + +- only reuse naming or state-machine ideas for live agent dashboards + +Priority: `Skip` + +--- + +### 15. CLI shell and file tools + +Sources: + +- `src/Tool/Coding/*` + +Why not reuse: + +- these are for local filesystem editing and shell execution inside a coding agent +- OpenCompany's tool surface is domain tools, integrations, documents, channels, tables, and tasks + +Possible exception: + +- isolated pieces of patch or diff handling if OpenCompany grows a coding workspace product + +Priority: `Skip` + +--- + +### 16. Desktop install, self-update, PHAR, binaries + +Sources: + +- `install.sh` +- CLI release flow +- PHAR and static binary distribution logic + +Why not reuse: + +- unrelated to OpenCompany's deployment model + +Priority: `Skip` + +--- + +### 17. Terminal-first permission UX + +Sources: + +- permission prompts and CLI interaction flows + +Why not reuse: + +- OpenCompany already has approvals, database-backed permission records, and human-in-the-loop flows +- the underlying policy concepts may help, but the UX should remain web-native + +Priority: `Skip` + +--- + +## Concrete Reuse List + +This is the full list in one place. + +### Reuse now + +- Shared provider and model metadata ownership +- Prompt frame splitting for prompt cache efficiency +- Tool result deduplication +- Output truncation with persisted full payloads + +### Reuse next + +- Context budgeting and pre-flight checks +- Compaction circuit breaker +- Protected runtime context +- Typed settings schema +- Subagent orchestration design + +### Reuse later + +- Skill system +- Instruction source precedence model +- Session reconstruction and resume ideas +- Permission evaluator pipeline pattern + +### Do not reuse directly + +- Symfony TUI renderer +- ANSI terminal renderer +- local shell execution tools +- local filesystem coding tools +- PHAR and binary release tooling +- terminal approval UX + +--- + +## Recommended Migration Order + +### Phase 1 + +- Consolidate provider/model metadata into `prism-relay` +- Make OpenCompany consume shared provider and model definitions +- Port prompt-frame splitting into OpenCompany's agent pipeline + +### Phase 2 + +- Add tool-result deduplication +- Add output truncation and persisted large-result storage +- Add protected context handling + +### Phase 3 + +- Expand OpenCompany compaction into a full context management pipeline +- Add budget snapshots, micro-pruning, and failure circuit breaking +- Move toward schema-driven runtime settings + +### Phase 4 + +- Design web-native subagent orchestration using KosmoKrator's swarm ideas +- Add richer dependency and concurrency controls on top of tasks and queues + +### Phase 5 + +- Introduce skills for workspaces, teams, or agents +- Introduce instruction layering for imported projects or external code contexts + +--- + +## Recommended First Phase We Can Start Soon + +If the goal is to start shipping reuse work immediately with low risk, the first phase should be: + +### Phase 1A: Prompt and Context Hygiene + +Bring over these first: + +- Prompt frame splitting from `src/LLM/PromptFrameBuilder.php` +- Tool result deduplication from `src/Agent/ToolResultDeduplicator.php` +- Output truncation from `src/Agent/OutputTruncator.php` + +Why this should be first: + +- small surface area +- no product UI changes required +- no queue architecture changes required +- immediate token and context efficiency wins +- low coupling to CLI-specific code + +OpenCompany target areas: + +- `app/Agents/OpenCompanyAgent.php` +- `app/Jobs/AgentRespondJob.php` +- `app/Services/Memory/*` + +Expected outcome: + +- smaller prompts +- fewer repeated tool payloads +- safer handling of oversized tool outputs +- lower model cost and fewer context-window failures + +### Phase 1B: Model Metadata Consolidation + +Start immediately after Phase 1A: + +- move toward shared provider/model metadata ownership in `prism-relay` +- reduce duplication between: + - `config/integrations.php` + - `app/Services/Memory/ModelContextRegistry.php` + - KosmoKrator's `src/LLM/ProviderCatalog.php` + - KosmoKrator's `config/models.yaml` + +Why this should be second: + +- strategically important +- unlocks cleaner provider UI and runtime behavior in both repos +- but touches more shared infrastructure than prompt hygiene does + +Expected outcome: + +- one source of truth for context windows, pricing, auth mode, defaults, and capabilities +- simpler provider setup and model resolution in OpenCompany + +### What Not To Include In First Phase + +Do not include these in the first phase: + +- subagent swarm orchestration +- permission system redesign +- skill system rollout +- session storage redesign +- TUI or CLI code + +Why not: + +- these are higher-risk and product-shaping changes +- they need architecture decisions, not just reuse work +- they will slow down the first useful delivery + +### Suggested Deliverables For The First Phase + +1. Add a prompt-splitting helper for OpenCompany system prompts +2. Add a tool-result dedupe pass before final LLM submission +3. Add large-output truncation plus persisted full-result storage +4. Add instrumentation around prompt size reduction and truncation frequency +5. Open a follow-up shared-infra task for provider/model metadata consolidation + +### Concrete Recommendation + +If we want the best first phase, start with: + +1. `PromptFrameBuilder` +2. `ToolResultDeduplicator` +3. `OutputTruncator` + +Then do provider/model catalog consolidation as the next phase. + +This gives the fastest path to measurable gains without dragging us into a large refactor. + +--- + +## What This Means In Practice + +OpenCompany should treat KosmoKrator as the stronger source of truth for: + +- agent runtime mechanics +- model metadata handling +- context management +- prompt hygiene +- subagent orchestration concepts + +OpenCompany should **not** treat KosmoKrator as the source of truth for: + +- UX +- storage model +- permissions UI +- shell and filesystem tool design + +The right strategy is: + +- extract shared infrastructure downward into shared packages +- port reusable runtime logic upward into OpenCompany +- leave terminal-specific product code behind diff --git a/docs/architecture/runtime-alignment-implementation-audit.md b/docs/architecture/runtime-alignment-implementation-audit.md new file mode 100644 index 0000000..cb100bc --- /dev/null +++ b/docs/architecture/runtime-alignment-implementation-audit.md @@ -0,0 +1,21 @@ +# Runtime Alignment Implementation Audit + +Date: 2026-04-09 +Status: Review complete — all findings tracked in Plane. + +## Findings + +All findings from this audit are now tracked as issues in the [OpenCompany Plane project](https://plane.gingermedia.biz/kosmokrator/projects/ceaf5d22-612a-42bf-9cc8-0dac054cdf0c/issues/): + +| Issue | Finding | Severity | +|-------|---------|----------| +| OC-1 | ~~Prompt caching depends on ignored `vendor` patches~~ — **Fixed**: `CachingPrismGateway` in `prism-relay`, vendor patches reverted | ~~High~~ | +| OC-3 | ~~`planPromptCache()` never called in request flow~~ — **Fixed**: called via `CachingPrismGateway` for all providers | ~~High~~ | +| OC-4 | `ContextBudget` undercounts retry context pressure | High | +| OC-5 | `ModelContextRegistry` regressed prefix-style admin overrides | Medium | +| OC-6 | Durable-memory extraction re-logs same facts on later compactions | Medium | + +## Verification Notes + +- The workspace requires a filtered `APP_PACKAGES_CACHE` during tests because several `opencompanyapp/integration-*` packages are absent from `vendor`. +- Prompt-cache metrics have not been verified end-to-end through production observability. diff --git a/docs/external-channel-sync.md b/docs/external-channel-sync.md index 14c590f..8c095da 100644 --- a/docs/external-channel-sync.md +++ b/docs/external-channel-sync.md @@ -2,17 +2,17 @@ Making agents full community participants — not just chatbots. -## Implementation Status (February 2026) +## Implementation Status | Phase | Telegram | Discord | |-------|----------|---------| | Phase 1: External message ID tracking | **Done** | N/A yet | -| Phase 2: Bidirectional sync (edit/delete/pin/react) | **Done** | Not started | -| Phase 3: External channel discovery | **Done** (monitored channels) | Not started | +| Phase 2: Bidirectional sync (edit/delete/pin/react) | **Done** | Not started (OC-44) | +| Phase 3: External channel discovery | **Done** (monitored channels) | Not started (OC-44) | | Phase 4: Message search | **Done** | Done (DB-level) | -**Key implementation files:** -- `app/Listeners/SyncToTelegram.php` — Consolidated listener (replaces ForwardMessageToTelegram) handling message send, edit, delete, pin, and reaction sync +**Key implementation files (Telegram — complete):** +- `app/Listeners/SyncToTelegram.php` — Consolidated listener handling message send, edit, delete, pin, and reaction sync - `app/Events/MessageEdited.php`, `MessageDeleted.php`, `MessagePinned.php`, `MessageReactionAdded.php` — Sync events - `app/Services/TelegramService.php` — Platform API methods (edit, delete, pin, react) - `app/Agents/Tools/Chat/ManageMessage.php` — Agent tool with edit action + sync indicator @@ -20,318 +20,7 @@ Making agents full community participants — not just chatbots. - `app/Agents/Tools/Chat/DiscoverExternalChannels.php` — Browse external platform channels - `database/migrations/2026_02_14_200001_add_external_message_id_to_messages_table.php` — External ID tracking -**What's left:** Discord sync listener (`SyncToDiscord`), Discord channel discovery via REST API, Discord webhook controller for inbound events. - ---- - -## The Problem - -Agents can send messages to external channels (Telegram, Discord) and that's it. Reactions, pins, edits, and deletes are workspace-only — they never sync to the external platform. Agents can't browse Discord server channels, can't react to a Telegram message, can't edit their own response after sending. They're chatbots, not community members. - -## The Vision - -Agents should feel like **real team members** on Discord and Telegram — browsing channels, reacting to messages, editing responses, pinning important content, moving between channels strategically. The workspace is the brain; external platforms are the hands. - ---- - -## Current State - -| Capability | Internal channels | External (Telegram) | External (Discord) | -|---|---|---|---| -| Send messages | Yes | Yes (auto-sync) | Yes (auto-forwards) | -| Read messages | Yes | Yes (from DB) | Yes (from DB) | -| Edit messages | Yes (`manage_message`) | **Yes (synced)** | No | -| Add reactions | Yes | **Yes (synced)** | Not synced | -| Pin messages | Yes | **Yes (synced)** | Not synced | -| Delete messages | Yes | **Yes (synced)** | Not synced | -| Browse channels | Yes (`list_channels`) | **Yes** (`discover_external_channels`) | DB-stored only | -| Search messages | **Yes** (`search_messages`) | **Yes** | **Yes** | - -### Root cause (now resolved for Telegram) - -~~No external message ID tracking. When a message is sent TO Telegram, the returned `message_id` is discarded. When a message comes FROM Telegram, its `message_id` is used for dedup but never stored. Without this mapping, the system can't target a specific message on the external platform for edit/react/pin/delete.~~ - -**Resolved:** The `external_message_id` column on the `messages` table now tracks platform message IDs for both inbound and outbound messages. Telegram sync is fully operational. - -### Current agent tools (chat group) - -| Tool | What it does | External support | -| ---- | ------------ | ---------------- | -| `send_channel_message` | Post message to any channel | Yes — auto-syncs to Telegram, auto-forwards to Discord | -| `read_channel` | Read recent messages, threads, pinned | Yes — reads from workspace DB (includes external message IDs) | -| `list_channels` | List accessible channels by type | Yes — shows external channels from DB | -| `manage_message` | Edit, delete, pin, add/remove reactions | **Telegram: fully synced** — Discord: workspace DB only | -| `search_messages` | Full-text search across channels | Yes — searches all channels including external | -| `discover_external_channels` | Browse external platform channels | **Telegram: implemented** — Discord: not yet | - ---- - -## What Agents See Today (Exact Tool Output) - -### `list_channels` — Can the agent tell channels apart? - -**Yes.** External channels are clearly marked with `type: external` and `provider: {name}`. They also lack the `#` prefix that internal channels have. - -``` -Workspace channels: -- #general (id: 9a3f..., type: public, 15 members) -- #engineering (id: 2b7c..., type: private, 8 members) -- Telegram Support, provider: telegram (id: 4d1e..., type: external, 3 members, provider: telegram) -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -The agent can filter by type: - -```json -{ "type": "external" } -``` - -``` -Workspace channels: -- Telegram Support, provider: telegram (id: 4d1e..., type: external, 3 members, provider: telegram) -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -**What the agent CAN tell:** -- Which channels are internal (`#` prefix, `type: public/private`) -- Which are external (`type: external`, no `#` prefix) -- Which provider each external channel belongs to (`provider: telegram`, `provider: discord`) -- How many members are in each channel - -**What the agent CANNOT tell:** -- Activity level (no message count or last activity timestamp) -- Unread count -- Which external channels exist on the platform but aren't monitored yet (see Phase 3) - -### `read_channel` — What messages look like to the agent - -```json -{ "channelId": "4d1e...", "action": "recent_messages", "limit": 5 } -``` - -``` -Recent messages in Telegram Support: -[2025-02-11 09:15] Alice: Hey, I'm having trouble with my subscription -[2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. Can you tell me what error you're seeing? -[2025-02-11 09:18] Alice: It says "payment method declined" but my card works fine -[2025-02-11 09:19] Atlas: Let me check your account. One moment... -[2025-02-11 09:20] Atlas: I see the issue — your card's 3D Secure verification expired. I've reset it. -``` - -**What the agent CAN tell:** -- Who said what, with timestamps -- The conversation flow and context -- Channel name (from the header line) - -**What the agent CANNOT tell:** -- **Message IDs** — not shown. The agent has no way to reference a specific message for reactions, pins, or edits. This is a critical gap (see below). -- **Source/origin** — was Alice's message typed in Telegram or in the workspace UI? The agent can't tell. Both look identical. -- **Reactions on messages** — existing reactions are not displayed -- **Whether a message is pinned** — not indicated in the output - -**Thread reading:** - -```json -{ "channelId": "4d1e...", "action": "thread", "messageId": "msg-uuid-here" } -``` - -``` -Thread for message by Alice: -[2025-02-11 09:15] Alice: Hey, I'm having trouble with my subscription ---- Replies (2) --- -[2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. -[2025-02-11 09:18] Alice: It says "payment method declined" -``` - -### `send_channel_message` — Minimal feedback - -```json -{ "channelId": "4d1e...", "content": "Your subscription has been renewed successfully!" } -``` - -``` -Message sent successfully to channel 'Telegram Support'. -``` - -The agent gets no message ID back — so it can't immediately edit or pin the message it just sent. - -### `manage_message` — Needs message IDs it can't get - -```json -{ "messageId": "???", "action": "add_reaction", "emoji": "👍" } -``` - -``` -Reaction added. -``` - -**The broken workflow:** `manage_message` requires a `messageId` parameter, but `read_channel` never shows message IDs. Today, agents can only use `manage_message` on messages whose IDs they received through other means (e.g., from an event payload in their task context). They cannot read a channel and then react to something they read — the IDs are invisible. - ---- - -## What Agents Would See After Enhancement - -### Enhanced `read_channel` (after Phase 1) - -Message IDs and source indicators become visible: - -``` -Recent messages in Telegram Support: -[msg:a1b2c3] [2025-02-11 09:15] Alice (via telegram): Hey, I'm having trouble with my subscription -[msg:d4e5f6] [2025-02-11 09:16] Atlas: Hi Alice! I'd be happy to help. Can you tell me what error you're seeing? -[msg:g7h8i9] [2025-02-11 09:18] Alice (via telegram): It says "payment method declined" but my card works fine -[msg:j0k1l2] [2025-02-11 09:19] Atlas: Let me check your account. One moment... -[msg:m3n4o5] [2025-02-11 09:20] Atlas: I see the issue — your card's 3D Secure verification expired. I've reset it. 📌 -``` - -**New information visible:** -- `[msg:a1b2c3]` — short message ID (first 6 chars of UUID) for easy referencing -- `(via telegram)` — source indicator, only shown for external-origin messages -- `📌` — pinned indicator -- Agents can now react: `{ "messageId": "a1b2c3...", "action": "add_reaction", "emoji": "👍" }` - -### Enhanced `send_channel_message` (after Phase 1) - -Returns the message ID so the agent can immediately reference it: - -``` -Message sent to 'Telegram Support' (msg:p6q7r8). -``` - -### Enhanced `manage_message` (after Phase 2) - -**Edit action (new):** - -```json -{ "messageId": "m3n4o5...", "action": "edit", "content": "Fixed: your 3D Secure verification was expired. I've reset it — try again now." } -``` - -``` -Message edited. Synced to telegram. -``` - -The edit appears in both the workspace UI AND in the Telegram chat. - -**Reaction with sync:** - -```json -{ "messageId": "a1b2c3...", "action": "add_reaction", "emoji": "👍" } -``` - -``` -Reaction added. Synced to telegram. -``` - -The thumbs up appears natively in Telegram on Alice's message. - -### `discover_external_channels` (Phase 3) - -```json -{ "provider": "discord", "action": "list_server_channels" } -``` - -``` -Discord server channels (FounderMode Community): - #general (id: 1234567890, status: monitoring, 1,240 messages) - #introductions (id: 1234567891, status: monitoring, 89 messages) - #support (id: 1234567892, status: not monitored) - #hiring (id: 1234567893, status: not monitored) - #off-topic (id: 1234567894, status: not monitored) - #announcements (id: 1234567895, status: monitoring, 45 messages) -``` - -```json -{ "provider": "discord", "action": "join_channel", "channelId": "1234567892" } -``` - -``` -Now monitoring Discord channel #support. Messages will appear in workspace. -``` - -### `search_messages` (Phase 4) - -```json -{ "query": "payment declined", "limit": 5 } -``` - -``` -Found 3 messages matching "payment declined": - -1. [msg:g7h8i9] [Telegram Support] Alice (2025-02-11 09:18): - "It says 'payment method declined' but my card works fine" - -2. [msg:x9y0z1] [#support] @dave (2025-02-09 15:42): - "Getting payment declined errors on the checkout page..." - -3. [msg:w8v7u6] [discord-general] Bob (via discord) (2025-02-05 11:30): - "Has anyone else had payment declined issues recently?" -``` - ---- - -## End-to-End Example: Agent as Community Manager - -Here's a realistic scenario showing an agent using multiple tools together: - -**1. Agent checks its channels:** -``` -→ list_channels(type: "external") - -Workspace channels: -- founders-chat, provider: telegram (id: 7f2a..., type: external, 5 members, provider: telegram) -- discord-general, provider: discord (id: 8c5b..., type: external, 42 members, provider: discord) -``` - -**2. Agent reads recent Discord activity:** -``` -→ read_channel(channelId: "8c5b...", limit: 10) - -Recent messages in discord-general: -[msg:aa1122] [2025-02-11 14:01] Sarah (via discord): Does anyone know how to set up webhooks? -[msg:bb3344] [2025-02-11 14:03] Mike (via discord): I think there's a guide somewhere -[msg:cc5566] [2025-02-11 14:05] Sarah (via discord): Can't find it 😕 -``` - -**3. Agent searches for context:** -``` -→ search_messages(query: "webhook guide") - -Found 1 message matching "webhook guide": -1. [msg:zz9900] [#announcements] System (2025-01-15 10:00): - "New webhook guide published: https://docs.example.com/webhooks" -``` - -**4. Agent responds with the link:** -``` -→ send_channel_message(channelId: "8c5b...", content: "Hey Sarah! Here's the webhook guide: https://docs.example.com/webhooks — let me know if you have questions!") - -Message sent to 'discord-general' (msg:dd7788). -``` -*This message appears in Discord as a bot message.* - -**5. Agent reacts to Sarah's original question:** -``` -→ manage_message(messageId: "aa1122...", action: "add_reaction", emoji: "✅") - -Reaction added. Synced to discord. -``` -*The checkmark appears natively on Sarah's message in Discord.* - -**6. Agent discovers a new channel to monitor:** -``` -→ discover_external_channels(provider: "discord", action: "list_server_channels") - -Discord server channels (FounderMode Community): - #general (id: 8c5b..., status: monitoring, 1,240 messages) - #support (id: 9d6c..., status: not monitored) - ... - -→ discover_external_channels(provider: "discord", action: "join_channel", channelId: "9d6c...") - -Now monitoring Discord channel #support. Messages will appear in workspace. -``` +**What's left:** Discord sync listener (`SyncToDiscord`), Discord channel discovery via REST API, Discord webhook controller for inbound events. Tracked as OC-44 in Plane. --- @@ -343,11 +32,11 @@ Now monitoring Discord channel #support. Messages will appear in workspace. │ (provider-agnostic, as today) │ │ │ │ send_channel_message │ - │ edit_message (NEW) │ │ manage_message (ENHANCED) │ │ read_channel │ - │ list_channels (ENHANCED) │ - │ search_messages (NEW) │ + │ list_channels │ + │ search_messages │ + │ discover_external_channels │ └──────────┬──────────────────────┘ │ ┌──────────▼──────────────────────┐ @@ -378,265 +67,29 @@ Now monitoring Discord channel #support. Messages will appear in workspace. --- -## Phase 1: External Message ID Tracking - -**Prerequisite for everything else.** Without knowing which workspace message maps to which Telegram/Discord message, you can't edit, react to, pin, or delete it on the external platform. - -### What changes - -1. **Migration**: Add `external_message_id` column to `messages` table (nullable string) - - Combined with `channel.external_provider`, this uniquely identifies the external message - -2. **Store inbound IDs**: `TelegramWebhookController::handleMessage()` already reads `$message['message_id']` for dedup. Store it on the created Message: - - ```php - $msg = Message::create([... - 'external_message_id' => (string) $telegramMessageId, - ]); - ``` - -3. **Store outbound IDs**: `ForwardMessageToTelegram` must capture the returned message ID from `sendMessage()` and store it: - - ```php - $result = $telegram->sendMessage($chatId, $text); - $message->update(['external_message_id' => (string) $result['message_id']]); - ``` - -4. **TelegramService::sendMessage()**: Currently returns void. Change to return the API response (which includes `message_id`). Same for `sendPhoto()`, `sendDocument()`. - -5. **Enhance ReadChannel output**: Include message IDs and source indicators in the output so agents can reference specific messages. Currently `ReadChannel.php` formats messages as `[timestamp] Author: content` — change to `[msg:id] [timestamp] Author (via source): content`. Also include pinned indicator. - -6. **Enhance SendChannelMessage output**: Return the message ID in the success response so the agent can immediately reference the message it just sent. - -### Files to modify - -| File | Change | -| ---- | ------ | -| New migration | Add `external_message_id` to `messages` | -| `app/Models/Message.php` | Add to `$fillable` | -| `app/Services/TelegramService.php` | Return response from `sendMessage()` etc. | -| `app/Listeners/ForwardMessageToTelegram.php` | Store returned message ID | -| `app/Http/Controllers/Api/TelegramWebhookController.php` | Store inbound message ID | -| `app/Agents/Tools/Chat/ReadChannel.php` | Add message IDs, source indicators, pin markers to output | -| `app/Agents/Tools/Chat/SendChannelMessage.php` | Return message ID in success response | - -Same pattern applies to Discord when implemented — `DiscordService::sendMessage()` returns the message ID, `ForwardMessageToDiscord` stores it, `DiscordWebhookController` stores inbound IDs. - ---- - -## Phase 2: Bidirectional Sync Layer - -### Consolidated listener pattern - -One listener per platform handles ALL sync types. Existing `ForwardMessageToTelegram` gets absorbed into `SyncToTelegram`: - -```php -class SyncToTelegram implements ShouldQueue -{ - public function subscribe(Dispatcher $events): array - { - return [ - MessageSent::class => 'handleMessageSent', - MessageEdited::class => 'handleMessageEdited', - MessageDeleted::class => 'handleMessageDeleted', - MessagePinned::class => 'handleMessagePinned', - MessageReactionAdded::class => 'handleReactionAdded', - ]; - } - - public function handleMessageSent(MessageSent $event): void - { - // Current ForwardMessageToTelegram logic moves here - } - - public function handleReactionAdded(MessageReactionAdded $event): void - { - // Look up external_message_id → call setMessageReaction - } - - // ... etc -} -``` - -Same pattern for `SyncToDiscord`. All platform-specific logic in one file per platform. - -### 2a. Reaction sync - -**Outbound** (workspace → platform): -- `ManageMessage` fires `MessageReactionAdded` event after adding a reaction -- `SyncToTelegram::handleReactionAdded()` calls Telegram `setMessageReaction` API -- `SyncToDiscord::handleReactionAdded()` calls Discord `PUT /channels/{id}/messages/{id}/reactions/{emoji}/@me` -- Requires `external_message_id` to target the correct message - -**Inbound** (platform → workspace): -- Telegram: `message_reaction` update type → webhook controller creates `MessageReaction` in DB -- Discord: `messageReactionAdd` Gateway event → sidecar forwards → controller creates `MessageReaction` +## Discord Implementation Plan -**New API methods needed:** - -```php -// TelegramService -public function setMessageReaction(string $chatId, int $messageId, string $emoji): array - -// DiscordService -public function addReaction(string $channelId, string $messageId, string $emoji): void -public function removeReaction(string $channelId, string $messageId, string $emoji): void -``` - -### 2b. Edit sync - -**New `edit` action in ManageMessage** (or separate `edit_message` tool): -- Agent provides `messageId` + `newContent` -- Updates Message content in DB -- Fires `MessageEdited` event -- `SyncToTelegram::handleMessageEdited()` calls `editMessageText()` -- `SyncToDiscord::handleMessageEdited()` calls `PATCH /channels/{id}/messages/{id}` - -### 2c. Pin sync - -When `ManageMessage` pins a message: -- Fires `MessagePinned` event -- `SyncToTelegram::handleMessagePinned()` calls `pinChatMessage` API -- `SyncToDiscord::handleMessagePinned()` calls `PUT /channels/{id}/pins/{message_id}` - -### 2d. Delete sync - -When `ManageMessage` deletes a message: -- Fires `MessageDeleted` event -- `SyncToTelegram::handleMessageDeleted()` calls `deleteMessage` API -- `SyncToDiscord::handleMessageDeleted()` calls `DELETE /channels/{id}/messages/{message_id}` - -### Files - -| File | Purpose | -|------|---------| -| `app/Events/MessageEdited.php` | New event | -| `app/Events/MessageDeleted.php` | New event | -| `app/Events/MessagePinned.php` | New event | -| `app/Events/MessageReactionAdded.php` | New event | -| `app/Listeners/SyncToTelegram.php` | Replaces `ForwardMessageToTelegram`, handles all sync types | -| `app/Listeners/SyncToDiscord.php` | Same pattern for Discord | -| `app/Agents/Tools/Chat/ManageMessage.php` | Fire new events after each action; add `edit` action | -| `app/Services/TelegramService.php` | Add `setMessageReaction`, `pinChatMessage`, `deleteMessage` | -| `app/Services/DiscordService.php` | Add `addReaction`, `removeReaction`, `editMessage`, `pinMessage`, `deleteMessage` | - ---- - -## Phase 3: External Channel Discovery - -Agents should be able to **browse a Discord server's channels** — not just ones already stored in the DB from received messages — and decide to monitor new ones. - -### New tool: `discover_external_channels` - -``` -Parameters: - - provider: 'discord' | 'telegram' - - action: 'list_server_channels' | 'join_channel' | 'leave_channel' - - channelId: (for join/leave — the external platform's channel ID) -``` - -### How it works - -**`list_server_channels`**: Calls Discord REST API `GET /guilds/{guild_id}/channels` → returns all text channels in the server, marking which ones are already being monitored (have a workspace Channel record). - -Example agent output: -``` -Discord server channels: - #general (id: 123456, monitoring: yes, 340 messages) - #support (id: 123457, monitoring: no) - #hiring (id: 123458, monitoring: no) - #announcements (id: 123459, monitoring: yes, 12 messages) -``` - -**`join_channel`**: Creates a workspace Channel record for a Discord channel that isn't in the DB yet. The sidecar is already forwarding ALL events — Laravel just wasn't creating a Channel for messages in unmonitored channels. After joining, messages from that channel get processed. - -**`leave_channel`**: Marks a Channel as inactive / stops processing messages from it. Does NOT delete history. - -This lets an agent say: *"I see there's a #support channel with unanswered questions. Let me start monitoring it."* - -### Files +Discord sync follows the same consolidated listener pattern as Telegram. Files to create: | File | Purpose | |------|---------| -| `app/Agents/Tools/Chat/DiscoverExternalChannels.php` | New tool | -| `app/Services/DiscordService.php` | Add `listGuildChannels()` | -| `app/Agents/Tools/ToolRegistry.php` | Register in chat group | - ---- - -## Phase 4: Message Search - -Agents need to research conversation history — essential for a community manager that needs context before responding. +| `app/Listeners/SyncToDiscord.php` | Handles all sync types (send, edit, delete, pin, react) | +| `app/Services/DiscordService.php` | Add `addReaction`, `removeReaction`, `editMessage`, `pinMessage`, `deleteMessage`, `listGuildChannels` | -### New tool: `search_messages` - -``` -Parameters: - - query: search string (required) - - channelId: scope to channel (optional) - - authorId: filter by author (optional) - - limit: max results, default 20 (optional) -``` - -Uses SQL full-text search or `LIKE` on `messages.content`. Returns matching messages with channel name, author, timestamp, and a content snippet with the match highlighted. - -Example agent output: -``` -Found 3 messages matching "pricing": -1. [#general] @alice (2025-05-10 14:23): "What's the pricing for the pro plan? I saw..." -2. [#support] @bob (2025-05-08 09:15): "Updated pricing page is live, check..." -3. [#announcements] @system (2025-05-01 12:00): "New pricing tiers announced..." -``` - -### Files - -| File | Purpose | -|------|---------| -| `app/Agents/Tools/Chat/SearchMessages.php` | New tool | -| `app/Agents/Tools/ToolRegistry.php` | Register in chat group | +The sidecar architecture for Discord is documented in [discord.md](discord.md). --- -## Phasing Summary - -| Phase | What | Unlocks | Depends on | -|-------|------|---------|------------| -| **1** | External message ID tracking | Edit, react, pin, delete on external platforms | Nothing | -| **2** | Bidirectional sync events + consolidated listeners | Agent reactions/pins/edits appear on Discord/Telegram | Phase 1 | -| **3** | External channel discovery | Agents browse and join Discord channels proactively | Nothing | -| **4** | Message search | Agents research conversation history | Nothing | - ---- +## What Full Sync Enables -## What This Enables +With all phases complete for a platform, an agent can: -With all phases complete, an agent can: - -1. **Browse** all Discord channels → *"There are 12 channels in the server. #general is most active, #support has 3 unanswered questions."* +1. **Browse** all channels → *"There are 12 channels. #support has 3 unanswered questions."* 2. **Join** a new channel → *"I'll start monitoring #support to help answer questions."* -3. **React** to a user's message with a thumbs up → the reaction appears natively in Discord/Telegram +3. **React** to a user's message → the reaction appears natively on the platform 4. **Pin** an important announcement → pinned in both workspace and platform -5. **Edit** its own previous response → edited in Discord/Telegram too -6. **Search** past conversations → *"Last week, user X asked about pricing. Here's what was discussed..."* -7. **Move between channels** strategically → *"The conversation in #general is about our roadmap. Let me check #product-updates for context, then respond."* - -The agent becomes a **real community participant** — not a bot stuck in one channel waiting for pings. - ---- - -## Updated chat tool group (after all phases) +5. **Edit** its own previous response → edited on the platform too +6. **Search** past conversations → *"Last week, user X asked about pricing."* +7. **Move between channels** strategically -```php -'chat' => [ - 'tools' => [ - 'send_channel_message', - 'read_channel', - 'list_channels', - 'manage_message', // enhanced: edit action, fires sync events - 'discover_external_channels', // NEW - 'search_messages', // NEW - ], - 'label' => 'send, read, list, manage, discover, search', - 'description' => 'Channel messaging with bidirectional external sync (Telegram, Discord)', -], -``` +The agent becomes a **real community participant** — not a bot stuck in one channel. diff --git a/docs/planning/implementation-todo.md b/docs/planning/implementation-todo.md deleted file mode 100644 index 8e01a29..0000000 --- a/docs/planning/implementation-todo.md +++ /dev/null @@ -1,1968 +0,0 @@ -# OpenCompany Agent System - Complete Implementation Todo - -> **Comprehensive hierarchical task list for implementing OpenClaw-style agent system** -> -> Legend: `[x]` = Complete, `[ ]` = Todo, `[~]` = In Progress -> Dependencies shown as `← depends on: [task-id]` - ---- - -## Technology Stack - -> See [Technology Decisions](../architecture/technology-decisions.md) for detailed comparison and rationale. - -| Component | Choice | Reason | -|-----------|--------|--------| -| **AI Framework** | **Laravel AI SDK (`laravel/ai`)** | Official first-party, full multimodal, comprehensive testing | - -**Core Packages:** -- `laravel/ai` - Official Laravel AI SDK (agents, tools, embeddings, multimodal) - -**Optional Packages:** -- `laravel/mcp` - Expose OpenCompany as MCP server for external AI clients - ---- - -## Phase 0: Package Installation & Setup - -> **Why:** Before building the agent system, we need the core AI package installed. Laravel AI SDK provides official first-party LLM integration. Laravel queues handle async task processing. - -### 0.1 Install Core Packages -- [x] **0.1.1** Install Laravel AI SDK — ✅ `laravel/ai` v0.1.2 in composer.json (also `prism-php/prism` installed) - - **What:** Official first-party Laravel package for AI/LLM integration with multiple providers - - **Why:** Laravel AI SDK is the official package from the Laravel team. It supports agents, tools, streaming, embeddings, image generation, audio, and comprehensive testing utilities. - - **Context:** We chose Laravel AI SDK over Prism (community package) for its first-party support, multimodal capabilities, and built-in testing. - ```bash - composer require laravel/ai - ``` - -- [x] **0.1.2** Publish AI SDK config — ✅ config/ai.php exists - - **What:** Creates `config/ai.php` with provider settings - - **Why:** Need to configure API keys and provider-specific settings. Also enables adding custom providers like GLM via OpenAI-compatible endpoint. - ```bash - php artisan vendor:publish --provider="Laravel\Ai\AiServiceProvider" - ``` - -- [x] **0.1.3** Configure providers in `config/ai.php` — ✅ DynamicProviderResolver + IntegrationSettings handle provider config - - **What:** Set up API credentials for all LLM providers - - **Why:** Anthropic/Claude is our primary LLM for agent tasks. OpenAI, Gemini, Groq, xAI are available as alternatives/fallbacks. - - **Context:** GLM/Zhipu AI uses OpenAI-compatible endpoint with custom base URL. Provider failover is built-in. - - Set `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, etc. in `.env` - -### 0.2 Verify Setup -- [x] **0.2.1** Test Laravel AI SDK agent ← depends on: [0.1.3] — ✅ agents operational with OpenCompanyAgent + AgentRespondJob - - **What:** Simple test to verify provider APIs are working - - **Why:** Catch configuration errors early before building dependent features. - - **Context:** Should return a response and log token usage. - ```php - use function Laravel\Ai\agent; - - $response = agent( - instructions: 'You are a helpful assistant.', - )->prompt('Hello, world!'); - ``` - -### 0.3 Optional: Install Extensions -- [ ] **0.3.1** Install Laravel MCP — NOT built (MCP Client integration exists for connecting TO external servers, but not exposing OpenCompany AS a server) - - **What:** Expose OpenCompany workspace as MCP server for external AI clients - - **Why:** Allows Claude Desktop, VS Code Copilot, and other MCP-compatible tools to interact with OpenCompany data. - - **Context:** Provides tools (search_documents, create_task, send_message) and resources (documents, agent configs) via MCP protocol. - ```bash - composer require laravel/mcp - ``` - ---- - -## Phase 1: Database Foundation - -> **Why:** The database schema is the foundation of the agent system. Each table maps to a core concept from OpenClaw's architecture, translated to business-friendly naming. - -### 1.1 Core Agent Tables -- [x] **1.1.1** Create `agent_configurations` migration — ✅ superseded: agent identity stored in Document-based files per agent; agent fields on `users` table - - **What:** Stores the core identity and personality of each AI agent - - **Why:** Agents need persistent personality (SOUL.md), instructions (AGENTS.md), and identity metadata. This is what makes each agent unique and consistent across sessions. - - **Context:** In OpenClaw, these are markdown files in the workspace. We store them in DB for easier management via UI. - - Fields: `id`, `user_id` (FK), `personality`, `instructions`, `identity`, `tool_notes`, `created_at`, `updated_at` - - `personality` = TEXT (markdown, SOUL.md equivalent) - Agent's tone, boundaries, operating principles - - `instructions` = TEXT (markdown, AGENTS.md equivalent) - Operating instructions, memory guidelines, skills - - `identity` = JSON (`{name, emoji, type, avatar, description}`) - Visual identity for UI - - `tool_notes` = TEXT (TOOLS.md equivalent) - Environment-specific tool notes (SSH hosts, device nicknames) - -- [x] **1.1.2** Create `agent_capabilities` migration ← depends on: [1.1.1] — ✅ superseded by `agent_permissions` table + AgentPermission model - - **What:** Junction table linking agents to their enabled capabilities/tools - - **Why:** Different agents need different tools. A code assistant needs git/file access, while a research agent needs web search. Per-agent capability control enables safe, scoped tool access. - - **Context:** This enables the "capabilities" tab in the agent settings UI where users can toggle tools on/off. - - Fields: `id`, `agent_config_id` (FK), `capability_id` (FK), `enabled`, `requires_approval`, `notes`, `created_at` - -- [x] **1.1.3** Create `capabilities` migration (master list) — ✅ superseded by ToolRegistry + AgentPermissionService (no separate capabilities table needed) - - **What:** Master list of all available tools/capabilities in the system - - **Why:** Centralizes tool definitions so new tools can be added system-wide and assigned to agents. Defines default approval requirements per tool type. - - **Context:** Seeded with common tools. Each has an icon for UI display and category for grouping. - - Fields: `id`, `name`, `description`, `icon`, `category`, `default_enabled`, `default_requires_approval`, `created_at` - - Seed with: code_execution, file_operations, git_operations, api_requests, database_access, production_deployment - -- [x] **1.1.4** Create `agent_settings` migration ← depends on: [1.1.1] — ✅ superseded by fields on `users` table (behavior_mode, brain, sleeping_until, etc.) - - **What:** Runtime behavior settings for each agent (how autonomous, cost limits, when to reset) - - **Why:** Different use cases need different autonomy levels. A production deployment agent should be strict (require approval for everything), while a dev assistant can be more autonomous. - - **Context:** The OpenClaw fields enable sophisticated execution control - allowlisting commands, reserving context space, auto-pruning old data. - - Fields: `id`, `agent_config_id` (FK), `behavior_mode` (enum: autonomous/supervised/strict), `cost_limit`, `reset_policy` (JSON), `created_at`, `updated_at` - - `reset_policy` = `{mode: 'daily'|'idle'|'manual', dailyHour?: number, idleMinutes?: number}` - - **OpenClaw fields (execution control):** - - `security_mode` enum: deny/allowlist/full (default: allowlist) - Controls which commands can execute - - `ask_mode` enum: off/on-miss/always (default: on-miss) - When to prompt user for approval - - `reserve_tokens` INTEGER (default: 16384) - Tokens reserved for compaction operations - - `reserve_tokens_floor` INTEGER (default: 20000) - Minimum safety floor for reserves - - `keep_recent_tokens` INTEGER (default: 20000) - Tokens to keep after compaction - - `pruning_ttl_minutes` INTEGER (default: 5) - How long before old tool results are pruned - - `auto_allow_skills` BOOLEAN (default: true) - Auto-allow trusted tool binaries (jq, grep, etc.) - - `soft_threshold_tokens` INTEGER (default: 4000) - Buffer before triggering memory flush - -- [x] **1.1.5** Update `capabilities` migration with tool kind — ✅ superseded: tool classification handled by ToolRegistry APP_GROUPS - - **What:** Classifies each tool by its operation type (read/edit/delete/execute/etc.) - - **Why:** Enables intelligent approval rules - auto-approve reads but require approval for deletes. Different risk levels for different operation types. - - **Context:** OpenClaw uses `inferToolKind()` to classify tools. We store it in DB for faster lookup. - - `kind` enum: read/edit/delete/move/search/execute/fetch/other (default: other) - -### 1.2 Memory & Session Tables - -> **Why:** Agents need persistent memory across conversations. Sessions track the current conversation, while memories persist facts and learnings long-term. - -- [x] **1.2.1** Create `agent_sessions` migration ← depends on: [1.1.1] — ✅ Superseded: conversations are channel-based (messages table + ChannelConversationLoader). Compaction tracked in `conversation_summaries` table. - -- [x] **1.2.2** Create `agent_session_messages` migration ← depends on: [1.2.1] — ✅ Superseded: messages stored in `messages` table per channel. Loaded by ChannelConversationLoader. - -- [x] **1.2.3** Create `agent_memories` migration ← depends on: [1.1.1] — ✅ Superseded: LTM stored as Documents in `agents/{slug}/memory/YYYY-MM-DD.md`, chunked into `document_chunks` with pgvector embeddings. Managed by SaveMemory/RecallMemory tools. - -- [x] **1.2.4** Create `agent_memory_daily_logs` migration ← depends on: [1.1.1] — ✅ Superseded: daily logs ARE the memory documents (`agents/{slug}/memory/YYYY-MM-DD.md`). Created by SaveMemory tool via AgentDocumentService. - -- [ ] **1.2.5** Create `agent_tool_allowlist` migration ← depends on: [1.1.1] — Not built (low priority: ApprovalWrappedTool handles tool approval; per-command allowlists not needed yet) - -### 1.3 Subagent Tables - -> **Why:** Agents need to spawn other agents for complex tasks. A code review agent might spawn a testing agent. These tables control who can spawn whom and track the parent-child relationships. - -- [x] **1.3.1** Create `subagent_spawn_permissions` migration ← depends on: [1.1.1] — ✅ Superseded: inter-agent communication uses `contact_agent` tool with ask/delegate/notify patterns. No formal spawn permissions table needed — agent permissions managed via AgentPermissionService. - -- [x] **1.3.2** Create `subagent_runs` migration ← depends on: [1.3.1] — ✅ Superseded: agent task tracking uses the `tasks` + `task_steps` tables. Contact between agents creates tasks visible in the Tasks UI. - -### 1.4 Run All Migrations - -> **Why:** Execute all database changes to create the foundation. Must run before creating models or any dependent code. - -- [x] **1.4.1** Run `php artisan migrate` ← depends on: [1.1.1-1.3.2] — ✅ 60+ migrations exist and run successfully - - **What:** Execute all migration files to create tables - - **Why:** Database must exist before models can query it. - -- [x] **1.4.2** Verify all tables created correctly — ✅ all tables operational - - **What:** Check that all tables, indexes, and constraints exist - - **Why:** Catch any migration errors early. Use `php artisan migrate:status` and check foreign keys. - -- [x] **1.4.3** Seed capabilities table with default capabilities — ✅ superseded: ToolRegistry provides capability list dynamically - - **What:** Populate the `capabilities` table with our 6 default tools - - **Why:** Agents need capabilities to choose from. These are system-wide definitions used by all agents. - - **Context:** Default capabilities: code_execution, file_operations, git_operations, api_requests, database_access, production_deployment - -### 1.5 Memory Search Infrastructure (OpenClaw) - -> **Why:** Agents need to search their memories efficiently. OpenClaw uses hybrid search (vector embeddings + full-text) for best results. Vector search finds semantically similar content, FTS finds exact matches. Combined scoring gives the best of both. - -#### 1.5.1 Vector Search Setup - -- [x] **1.5.1.1** Install pgvector extension — ✅ Enabled in `create_document_chunks_table` migration via `CREATE EXTENSION IF NOT EXISTS vector` - -- [x] **1.5.1.2** Create `memory_chunks` migration — ✅ Implemented as `document_chunks` table (unified for all document types). Fields: id, document_id, content, content_hash, embedding VECTOR(1536), collection, agent_id, chunk_index, metadata, search_vector (tsvector). HNSW index for cosine similarity. - -- [x] **1.5.1.3** Create `embedding_cache` migration — ✅ `embedding_cache` table with SHA256 key (provider+model+content), embedding vector column. - -#### 1.5.2 Full-Text Search Setup - -- [x] **1.5.2.1** Create PostgreSQL FTS index on memory_chunks — ✅ `search_vector` tsvector column on `document_chunks` with GIN index. Auto-populated via trigger on insert/update. - -- [x] **1.5.2.2** Create hybrid search function — ✅ Implemented as `HybridSearchService` (app/Services/Memory/HybridSearchService.php). Combines vector similarity + FTS with configurable weights (default 0.7/0.3). - -#### 1.5.3 Collection System (QMD) - -- [x] **1.5.3.1** Create `memory_collections` migration — ✅ Superseded: simpler approach using `collection` string column on `document_chunks` (values: 'general', 'memory', 'identity') + `agent_id` scoping. No separate collections table needed. - -- [x] **1.5.3.2** Create `memory_collection_documents` pivot migration — ✅ Superseded: collection membership determined by document location in folder hierarchy (DocumentObserver resolves collection from parent folders). - -#### 1.5.4 Result Clamping & Citation Support (QMD) - -- [x] **1.5.4.1** Add citation columns to `memory_chunks` migration — ✅ `document_chunks` has `document_id` FK and `metadata` JSON (stores title, path, dates). - -- [x] **1.5.4.2** Create `config/memory.php` configuration file — ✅ Comprehensive config with: embedding, chunking, search, reranking, context_windows, scope, compaction, memory_flush sections. - ---- - -## Phase 2: Laravel Models - -> **Why:** Eloquent models provide the ORM layer for all database operations. Models define relationships, casts, scopes, and business logic. Each model maps to a table from Phase 1. - -### 2.1 Core Models - -- [x] **2.1.1** Create `AgentConfiguration` model ← depends on: [1.4.1] — ✅ superseded by Document-based identity files + AgentDocumentService - - **What:** Primary model for agent identity - personality, instructions, and visual identity - - **Why:** Central model that all other agent-related models reference. Contains the agent's "soul" (personality) and "brain" (instructions). - - **Context:** Uses soft deletes so deleted agents can be restored. Casts ensure JSON fields are handled as arrays. - - Relationships: `belongsTo(User)`, `hasMany(AgentCapability)`, `hasOne(AgentSettings)`, `hasMany(AgentSession)`, `hasMany(AgentMemory)` - - Casts: `identity` → array, `personality` → string, `instructions` → string - -- [x] **2.1.2** Create `Capability` model ← depends on: [1.4.1] — ✅ superseded by ToolRegistry + AgentPermission model - - **What:** System-wide capability/tool definitions - - **Why:** Master list of available tools that agents can be granted. Includes tool kind for approval logic. - - **Context:** Read-only from application perspective - admin-seeded. Agents reference these via AgentCapability pivot. - - Relationships: `belongsToMany(AgentConfiguration)` through `agent_capabilities` - -- [x] **2.1.3** Create `AgentCapability` model (pivot with extra fields) ← depends on: [2.1.1, 2.1.2] — ✅ superseded by AgentPermission model (scope-based: tool, channel, folder, integration) - - **What:** Junction table linking agents to their enabled tools with per-agent settings - - **Why:** Each agent can have different tool permissions. One agent might have code_execution with approval required, another without. - - **Context:** The `notes` field stores agent-specific tool notes (e.g., "Use this for the staging server only"). - - Relationships: `belongsTo(AgentConfiguration)`, `belongsTo(Capability)` - -- [x] **2.1.4** Create `AgentSettings` model ← depends on: [2.1.1] — ✅ superseded by fields on User model (behavior_mode, brain, sleeping_until, etc.) - - **What:** Runtime behavior configuration for each agent - - **Why:** Controls autonomy level, cost limits, context management, and security modes. Separating from AgentConfiguration keeps identity separate from behavior. - - **Context:** Includes OpenClaw fields for reserve tokens, pruning TTL, security modes, etc. - - Relationships: `belongsTo(AgentConfiguration)` - - Casts: `reset_policy` → array, `behavior_mode` → enum - -### 2.2 Memory Models - -- [x] **2.2.1** Create `AgentSession` model ← depends on: [2.1.1] — ✅ Superseded: no formal session model. Conversations live in channels. Compaction tracked by `ConversationSummary` model (channel_id + agent_id + cumulative summary). - -- [x] **2.2.2** Create `AgentSessionMessage` model ← depends on: [2.2.1] — ✅ Superseded: messages stored in `Message` model. Loaded by `ChannelConversationLoader` which handles summary prepending and message-after-summary filtering. - -- [x] **2.2.3** Create `AgentMemory` model ← depends on: [2.1.1] — ✅ Superseded: LTM uses Document model (agents/{slug}/memory/YYYY-MM-DD.md) + `DocumentChunk` model for vector search. Managed by SaveMemory tool. - -- [x] **2.2.4** Create `AgentMemoryDailyLog` model ← depends on: [2.1.1] — ✅ Superseded: daily logs ARE documents. `AgentDocumentService::createMemoryLog()` creates/appends to daily log documents. - -### 2.3 Subagent Models - -- [x] **2.3.1** Create `SubagentSpawnPermission` model ← depends on: [2.1.1] — ✅ Superseded: agent permissions handled by AgentPermission model + AgentPermissionService. Inter-agent access controlled via `contact_agent` tool permissions. - -- [x] **2.3.2** Create `SubagentRun` model ← depends on: [2.3.1] — ✅ Superseded: agent task tracking uses Task + TaskStep models. Inter-agent communication via `contact_agent` tool creates traceable tasks. - -### 2.4 Extend User Model - -- [x] **2.4.1** Add relationships to User model ← depends on: [2.1.1-2.3.2] — ✅ User model has agent relationships (permissions, tasks, channels, documents, etc.) - - **What:** Connect User model to agent-related models - - **Why:** Users own agents. A user can have one agent configuration (if they are an agent user). Also tracks spawn permissions and runs. - - **Context:** The `hasOne(AgentConfiguration)` is for "agent users" - users that are actually AI agents in the system. - - `hasOne(AgentConfiguration)` - only for agent users - - `hasOne(SubagentSpawnPermission, 'parent_agent_id')` - - `hasMany(SubagentRun, 'parent_agent_id')` - - `hasMany(SubagentRun, 'child_agent_id')` - -- [x] **2.4.2** Add helper methods to User model — ✅ User model has isAgent(), agent-related scopes, permission helpers - - **What:** Convenience methods for common agent operations - - **Why:** Encapsulates agent-related logic in the model. `canSpawnAgent()` centralizes permission checking. - - **Context:** These methods are used throughout controllers and services. - - `isConfiguredAgent()` - checks if agent has configuration - - `getActiveSession()` - returns current session - - `canSpawnAgent($targetAgentId)` - checks spawn permission - ---- - -## Phase 3: API Controllers - -> **Why:** REST API layer that exposes agent functionality to the frontend. Each controller handles a specific domain (configuration, capabilities, settings, etc.) following Laravel resource conventions. - -### 3.1 Agent Configuration Controller - -- [x] **3.1.1** Create `AgentConfigurationController` ← depends on: [2.1.1] — ✅ superseded by AgentController with identity files API (GET/PUT /api/agents/{id}/identity/{fileType}) - - **What:** CRUD operations for agent personality, instructions, and identity - - **Why:** Frontend needs to fetch and update agent configuration. Separate PATCH endpoints allow updating individual fields without sending the entire config. - - **Context:** Personality and instructions are large text fields (markdown). Separate endpoints reduce payload size and enable autosave on specific fields. - - `GET /api/agents/{id}/configuration` - get agent config - - `PUT /api/agents/{id}/configuration` - update config - - `PATCH /api/agents/{id}/personality` - update personality only - - `PATCH /api/agents/{id}/instructions` - update instructions only - - `PATCH /api/agents/{id}/identity` - update identity only - - `PATCH /api/agents/{id}/tool-notes` - update tool notes only - -### 3.2 Agent Capabilities Controller - -- [x] **3.2.1** Create `AgentCapabilityController` ← depends on: [2.1.3] — ✅ superseded by AgentPermissionController (tool/channel/folder/integration permissions) - - **What:** Manage which tools/capabilities are enabled for an agent - - **Why:** Agents need different tools. This API enables the UI to toggle capabilities and set per-agent approval requirements. - - **Context:** Bulk update is important for "save all changes" UX. Individual PATCH allows toggling single capability without affecting others. - - `GET /api/agents/{id}/capabilities` - list agent capabilities - - `PUT /api/agents/{id}/capabilities` - bulk update capabilities - - `PATCH /api/agents/{id}/capabilities/{capabilityId}` - update single capability - -- [x] **3.2.2** Create `CapabilityController` ← depends on: [2.1.2] — ✅ superseded: ToolRegistry provides tool list; AgentPermissionController serves capability data - - **What:** Read-only access to system-wide capability definitions - - **Why:** Frontend needs the master list of available capabilities to render the capability assignment UI. - - **Context:** Capabilities are admin-seeded, not user-created. This is read-only. - - `GET /api/capabilities` - list all available capabilities - -### 3.3 Agent Settings Controller - -- [x] **3.3.1** Create `AgentSettingsController` ← depends on: [2.1.4] — ✅ superseded: agent settings managed via AgentController (PATCH /api/agents/{id}) + Settings tab in Agent/Show.vue - - **What:** Manage agent runtime behavior settings - - **Why:** Users need to control agent autonomy, cost limits, and reset policies. Settings affect how the agent operates, not who it is. - - **Context:** Includes OpenClaw settings (security_mode, ask_mode, reserve_tokens, etc.). Behavior mode enum: autonomous/supervised/strict. - - `GET /api/agents/{id}/settings` - get agent settings - - `PUT /api/agents/{id}/settings` - update all settings - - `PATCH /api/agents/{id}/settings/behavior-mode` - update behavior mode - - `PATCH /api/agents/{id}/settings/cost-limit` - update cost limit - - `PATCH /api/agents/{id}/settings/reset-policy` - update reset policy - -### 3.4 Agent Session Controller - -- [x] **3.4.1** Create `AgentSessionController` ← depends on: [2.2.1] — ✅ Superseded: no formal session controller. Agent conversations happen in channels. Message history accessed via ChannelController. Compaction summaries managed automatically by ConversationCompactionService. - -### 3.5 Agent Memory Controller - -- [x] **3.5.1** Create `AgentMemoryController` ← depends on: [2.2.3, 2.2.4] — ✅ Superseded: agent memory managed via Document API (agents/{slug}/memory/ and agents/{slug}/identity/MEMORY.md). Accessible through existing DocumentController. SaveMemory/RecallMemory tools handle agent-side memory management. - -### 3.6 Subagent Controller - -- [x] **3.6.1** Create `SubagentController` ← depends on: [2.3.1, 2.3.2] — ✅ Superseded: inter-agent communication handled by `contact_agent` tool (ask/delegate/notify patterns). Agent management via AgentController. No separate subagent controller needed. - -### 3.7 Register Routes - -- [x] **3.7.1** Add all routes to `routes/api.php` ← depends on: [3.1.1-3.6.1] — ✅ comprehensive routes for all controllers (268 lines in routes/api.php) - - **What:** Wire up all controller methods to URL routes - - **Why:** Routes connect HTTP requests to controller actions. Must be registered before frontend can call the API. - - **Context:** Group under `/api/agents` prefix. Auth middleware ensures only authenticated users access their agents. - - Group under `agents` prefix - - Apply auth middleware - - Add rate limiting where appropriate - ---- - -## Phase 3.5: Agent Execution Integration (Laravel AI SDK + Queues) - -> **Why:** This phase connects the AI layer (Laravel AI SDK) with Laravel's queue system for async task processing. Tools give agents abilities. Queue jobs and services coordinate multi-step agent tasks with durability and approval gates. - -### 3.5.1 Create Agent Tools - -- [x] **3.5.1.1** Create `app/Agents/Tools/` directory — ✅ exists with 30+ tool classes across Chat/, Docs/, Lists/, Tables/, Calendar/, Tasks/, System/, Workspace/, Charts/, Telegram/, Agents/ subdirs - - **What:** Directory for Laravel AI SDK tool definitions - - **Why:** Organizes AI tools separately from services. Each tool class implements the SDK `Tool` contract. - -- [x] **3.5.1.2** Create tool classes for agent capabilities — ✅ 30+ tools implemented - - **What:** Laravel AI SDK `Tool` implementations for each capability type - - **Why:** Tools are how agents interact with the system. Each tool wraps a system capability (documents, tasks, messaging, etc.) with parameter validation and execution logic. - - **Context:** Tools implement the SDK `Tool` contract with `description()`, `handle()`, and `schema()` methods. Use `php artisan make:tool` to scaffold. - - `SearchDocuments` - search workspace documents - - `ReadDocument` / `UpdateDocument` - document CRUD - - `CreateListItem` / `UpdateListItem` - list management - - `SendMessage` - messaging - - `CreateTaskStep` - task progress tracking - - `CreateApproval` - request human approval - - `QueryDataTable` - data table queries - - `WebSearch` / `WebFetch` - web capabilities (SDK built-in) - -- [x] **3.5.1.3** Create tool registry service — ✅ app/Agents/Tools/ToolRegistry.php with APP_GROUPS and getToolsForAgent() - - **What:** Service that provides tools to agents based on their DB-stored capabilities - - **Why:** Agents should only see tools they're allowed to use. The registry maps capability strings from the DB to tool class instances. - - **Context:** Called by `OpenCompanyAgent::tools()` to resolve the tool list dynamically. - ```php - class ToolRegistry { - private array $capabilityToolMap = [ - 'documents' => [SearchDocuments::class, ReadDocument::class, UpdateDocument::class], - 'lists' => [CreateListItem::class, UpdateListItem::class], - 'messaging' => [SendMessage::class], - 'tasks' => [CreateTaskStep::class], - 'approvals' => [CreateApproval::class], - 'web_search' => [WebSearch::class], - 'web_fetch' => [WebFetch::class], - ]; - - public function getToolsForAgent(User $agent): array - } - ``` - -### 3.5.2 Create Agent Jobs - -> **Why:** Jobs are the building blocks of agent task execution. Each job does one thing: fetch config, execute AI, save message, etc. Jobs are retryable and queued for async processing. - -- [x] **3.5.2.1** Create `app/Jobs/Agent/` directory — ✅ superseded: agent jobs live directly in app/Jobs/ (AgentRespondJob, ExecuteAgentTaskJob, etc.) - - **What:** Directory for agent-specific job classes - - **Why:** Organizes agent jobs separately from other system jobs. Each class handles one atomic operation. - -- [x] **3.5.2.2** Create `FetchAgentConfigJob` — ✅ superseded: config fetching is inline in AgentRespondJob + OpenCompanyAgent - - **What:** Load agent configuration and enabled tools from database - - **Why:** Agent tasks need agent config to operate. This job fetches who the agent is and what they can do. - - **Context:** Returns AgentConfiguration with relationships (capabilities, settings) loaded. - - Fetch agent configuration from database - - Return config with enabled tools - -- [x] **3.5.2.3** Create `ExecuteAgentJob` ← depends on: [3.5.1.2] — ✅ implemented as AgentRespondJob + ExecuteAgentTaskJob in app/Jobs/ - - **What:** Execute Laravel AI SDK agent call with tools - - **Why:** This is the core AI execution - send prompt to LLM, get response, handle tool calls. This job wraps `OpenCompanyAgent` for queued execution. - - **Context:** Uses SDK's `#[MaxSteps]` attribute for multi-turn tool use. Token tracking is critical for billing and context management. - - Execute agent prompt with tools - - Handle streaming responses via `->stream()->broadcastOnQueue()` - - Track token usage - ```php - class ExecuteAgentJob implements ShouldQueue { - public function handle(): AgentResult { - $config = app(DynamicProviderResolver::class)->resolveForAgent($this->agentUser); - $agent = OpenCompanyAgent::for($this->agentUser); - - return $agent->prompt( - $this->prompt, - provider: $config['provider'], - model: $config['model'], - ); - } - } - ``` - -- [x] **3.5.2.4** Create `CreateApprovalRequestJob` — ✅ superseded: approval creation handled by ApprovalWrappedTool + SendApprovalToTelegramJob - - **What:** Create an approval request record and notify users - - **Why:** When agent wants to do something risky (database access, deployment), humans must approve. This job creates the approval request. - - **Context:** Approval requests appear in the Approvals page. Users are notified via WebSocket. - - Create approval record in database - - Notify relevant users - - Return approval request ID - -- [x] **3.5.2.5** Create approval handling service — ✅ ApprovalExecutionService + WaitForApproval tool + ApprovalController - - **What:** Service that polls/waits for approval decisions - - **Why:** Agent execution must pause and wait for human decision. This service checks approval status and resumes execution when approved/rejected. - - **Context:** Can use polling or event-based approach. Rejection cancels the task. - - Check approval status - - Resume execution when approved/rejected - -- [x] **3.5.2.6** Create `ExecuteApprovedActionJob` — ✅ superseded: ApprovalExecutionService handles executing approved actions inline - - **What:** Execute the action that was approved - - **Why:** After approval, the original tool call needs to be executed. This job runs the approved action safely. - - **Context:** Logs the execution for audit trail. Updates task status to completed. - - Execute the approved action - - Update task status - -- [x] **3.5.2.7** Create `SaveSessionMessageJob` — ✅ Superseded: messages saved inline by AgentRespondJob (creates Message in channel). No separate job needed. - -- [x] **3.5.2.8** Create `MemoryFlushJob` ← depends on: [3.6.2.1] — ✅ Implemented as `MemoryFlushService` (app/Services/Memory/MemoryFlushService.php). Hooked into AgentRespondJob before prompt() call. Uses [FLUSH_COMPLETE] sentinel. - -- [ ] **3.5.2.9** Create `PruneSessionJob` — Not built (low priority: context management handled by ConversationCompactionService which summarizes old messages rather than pruning tool results) - -- [x] **3.5.2.10** Create `CheckMemoryFlushJob` — ✅ Integrated into MemoryFlushService::shouldFlush() — checks soft threshold (4k tokens before compaction) and flush_count per cycle. Called from AgentRespondJob. - -### 3.5.3 Create Agent Orchestration Services - -> **Why:** Services orchestrate jobs into complete agent operations. They handle the full lifecycle: load config → check context → execute AI → save results → handle approvals. - -- [x] **3.5.3.1** Create `app/Services/Agent/` directory — ✅ superseded: agent services live in app/Services/ (AgentChatService, AgentPermissionService, AgentDocumentService, etc.) - - **What:** Directory for agent orchestration services - - **Why:** Organizes agent services separately. Each service class defines a complete agent operation. - -- [x] **3.5.3.2** Create `AgentTaskService` ← depends on: [3.5.2.2-3.5.2.10] — ✅ superseded: implemented as AgentChatService + AgentRespondJob orchestration - - **What:** Main service for executing an agent task (responding to user input) - - **Why:** This is the core agent loop. It handles OpenClaw patterns (memory flush, pruning), executes the AI, saves messages, and manages approvals. - - **Context:** Uses Laravel's queue system for async execution. Jobs can be retried on failure. - ```php - class AgentTaskService { - public function execute(AgentTask $task): AgentResult { - // 1. Fetch agent config - $config = FetchAgentConfigJob::dispatchSync($task->agentId); - - // 2. Check if memory flush needed before execution (OpenClaw) - $flushNeeded = CheckMemoryFlushJob::dispatchSync($task->sessionId); - if ($flushNeeded) { - MemoryFlushJob::dispatchSync($task->sessionId); - } - - // 3. Prune session if TTL elapsed (OpenClaw) - PruneSessionJob::dispatchSync($task->sessionId); - - // 4. Execute agent with Laravel AI SDK - $result = ExecuteAgentJob::dispatchSync($config, $task->prompt); - - // 5. Handle silent responses (NO_REPLY convention) - if (str_starts_with($result->text, 'NO_REPLY')) { - SaveSessionMessageJob::dispatchSync($task->sessionId, $result, true); - return $result->withSuppressedOutput(); - } - - // 6. Save messages to session - SaveSessionMessageJob::dispatchSync($task->sessionId, $result); - - // 7. Handle approval if needed - if ($result->requiresApproval) { - $approval = CreateApprovalRequestJob::dispatchSync($result); - $approved = $this->waitForApproval($approval->id); - - if ($approved) { - ExecuteApprovedActionJob::dispatchSync($result); - } - } - - return $result; - } - } - ``` - -- [x] **3.5.3.3** Create `AgentSessionResetService` — ✅ Superseded: no formal sessions to reset. Agent sleep/wake managed by `sleeping_until` field + `AgentResumeFromSleepJob`. Conversation context managed by CompactConversationJob. - -- [x] **3.5.3.4** Create `SubagentSpawnService` ← depends on: [3.5.3.2] — ✅ Superseded: inter-agent communication via `contact_agent` tool creates tasks (ask/delegate/notify patterns). No separate spawn service needed. - -### 3.5.4 Queue Infrastructure - -> **Why:** Agent jobs need queue workers to process them and APIs to monitor/control them. This infrastructure makes agent execution operational. - -- [x] **3.5.4.1** Configure queue workers for agent jobs — ✅ queue config exists; agent jobs dispatched to queue - - **What:** Set up queue configuration for agent job processing - - **Why:** Agent jobs need dedicated queue configuration. May need separate queues for high-priority vs background tasks. - - **Context:** Configure in `config/queue.php`. Consider separate connection for agent jobs. - ```bash - php artisan queue:work --queue=agents,default - ``` - -- [x] **3.5.4.2** Add agent task status endpoints — ✅ TaskController with full lifecycle endpoints (start/pause/resume/complete/fail/cancel) - - **What:** API endpoints to check agent task status and manage execution - - **Why:** Frontend needs to display task progress (e.g., "waiting for approval", "executing"). Endpoints enable monitoring and control. - - **Context:** Status updates broadcast via WebSocket for real-time UI updates. - - `GET /api/agent-tasks/{id}` - get task status - - `POST /api/agent-tasks/{id}/cancel` - cancel running task - -- [ ] **3.5.4.3** Configure Horizon for queue monitoring (optional) - - **What:** Install Laravel Horizon for queue monitoring dashboard - - **Why:** Debugging agent jobs is easier with a visual UI. Shows job history, failures, and queue metrics. - - **Context:** Optional - can use database queries or Laravel Telescope if preferred. - ```bash - composer require laravel/horizon - ``` - ---- - -## Phase 3.6: Context Management Services (OpenClaw) - -> **Why:** These services implement OpenClaw's sophisticated context management patterns. Without them, agents would lose important context during compaction, accumulate bloated tool results, and lack nuanced approval controls. - -### 3.6.1 Context Window Guard - -- [x] **3.6.1.1** Create `ContextWindowGuard` service ← depends on: [2.1.4] — ✅ Implemented as `ModelContextRegistry` (maps 40+ models to context window sizes) + `ConversationCompactionService::needsCompaction()` (threshold checking). TokenEstimator handles token counting. - -### 3.6.2 Pre-Compaction Memory Flush - -- [x] **3.6.2.1** Create `MemoryFlushService` ← depends on: [3.6.1.1] — ✅ Implemented: `app/Services/Memory/MemoryFlushService.php`. `shouldFlush()` checks soft threshold (4k tokens before compaction) and `flush_count` per cycle. `flush()` runs silent agent turn with save_memory tool access. - -- [x] **3.6.2.2** Create memory flush system prompt — ✅ Built into MemoryFlushService::buildFlushPrompt(). Instructs agent to use save_memory (target: "log") for daily entries, "core" only for high-value permanent facts. Uses [FLUSH_COMPLETE] sentinel. - -### 3.6.3 Session Pruning - -- [ ] **3.6.3.1** Create `SessionPruningService` — Not built (low priority: ConversationCompactionService handles context management by summarizing old messages. Tool result pruning not yet needed.) - -### 3.6.4 Tool Kind Classification - -- [x] **3.6.4.1** Create `ToolKindClassifier` service — ✅ Superseded: ToolRegistry::TOOL_MAP has a `type` field per tool ('read', 'write', 'action', etc.). ApprovalWrappedTool uses this for approval decisions. - -### 3.6.5 Execution Approval System - -- [x] **3.6.5.1** Create `ExecutionApprovalService` ← depends on: [3.6.4.1] — ✅ Implemented as `ApprovalWrappedTool` + `ApprovalExecutionService`. Tools requiring approval are wrapped; approval requests stored in DB; `WaitForApproval` tool pauses agent execution. Behavior modes (autonomous/supervised/strict) control approval requirements. - -- [x] **3.6.5.2** Define default safe skills — ✅ Implemented via AgentPermission model. Per-agent tool permissions with enable/disable per tool group. Read-only tools generally don't require approval. - -- [ ] **3.6.5.3** Create `AgentToolAllowlist` model — Not built (low priority: per-command allowlists not needed yet. Current system uses per-tool-group permissions + behavior mode for approval decisions.) - ---- - -## Phase 3.7: Hybrid Memory Search (OpenClaw) - -> **Why:** Agents need to search their memories intelligently. Hybrid search combines vector embeddings (semantic similarity) with full-text search (exact matches) for best results. This enables agents to recall relevant information even when phrased differently. - -### 3.7.1 Embedding Service - -- [x] **3.7.1.1** Create `EmbeddingService` ← depends on: [0.1.3] — ✅ `app/Services/Memory/EmbeddingService.php`. Supports OpenAI + Ollama providers. embed() and embedBatch() methods. Configurable via config/memory.php. - -- [x] **3.7.1.2** Create `EmbeddingCacheService` ← depends on: [1.5.1.3] — ✅ Built into EmbeddingService. Uses `EmbeddingCache` model with SHA256 cache key (provider+model+content). Checks cache first, only calls API for uncached texts. - -### 3.7.2 Chunking Service - -- [x] **3.7.2.1** Create `ChunkingService` — ✅ `app/Services/Memory/ChunkingService.php`. Splits on paragraph breaks with configurable max_chunk_size (512 tokens) and overlap (64 tokens). Token estimation via word count * 1.3. - -### 3.7.3 Memory Indexing - -- [x] **3.7.3.1** Create `MemoryIndexService` ← depends on: [3.7.1.1, 3.7.2.1] — ✅ Implemented as `DocumentIndexingService` (app/Services/Memory/DocumentIndexingService.php). index(), deindex(), search() methods. Called by IndexDocumentJob (async) and DocumentObserver (on document save/delete). - -### 3.7.4 Hybrid Search - -- [x] **3.7.4.1** Create `HybridMemorySearch` service ← depends on: [3.7.3.1] — ✅ `app/Services/Memory/HybridSearchService.php`. Combines vector similarity (pgvector `<=>`) with FTS (`ts_rank`). Configurable weights (default 0.7/0.3). Score normalization, result clamping, and collection filtering built in. Also: `RerankingService` for cross-encoder reranking via Ollama. - -- [x] **3.7.4.2** Create `MemorySearchController` — ✅ Superseded: memory search exposed to agents via RecallMemory tool (uses HybridSearchService). Frontend document search via existing SearchController. No separate memory search API endpoint needed. - -### 3.7.5 Collection Management (QMD) - -- [x] **3.7.5.1** Create `MemoryCollection` model — ✅ Superseded: collection scoping uses `collection` string column on `document_chunks` ('general', 'memory', 'identity') + `agent_id` column. Resolved automatically by DocumentObserver based on document folder hierarchy. No separate collection model needed. - -- [x] **3.7.5.2** Create default collections in `AgentDocumentService` — ✅ Superseded: DocumentObserver auto-resolves collection type from folder path (agents/*/memory/ → 'memory', agents/*/identity/ → 'identity', everything else → 'general'). - -- [x] **3.7.5.3** Create `MemoryCollectionController` — ✅ Superseded: no separate collections to manage. Collection assignment is automatic via document location. - -### 3.7.6 Session Transcript Indexing (QMD) - -- [ ] **3.7.6.1** Create `ExportSessionTranscriptJob` — Not built (future enhancement: export channel conversation history as searchable documents for cross-conversation recall) - -- [ ] **3.7.6.2** Wire session transcript export to session lifecycle — Not built (depends on 3.7.6.1) - -### 3.7.7 Periodic Re-Indexing (QMD) - -- [x] **3.7.7.1** Create `PeriodicReindexJob` — ✅ Superseded: DocumentObserver triggers IndexDocumentJob on every document save/update. `memory:index-documents --fresh` command available for manual bulk reindex. No periodic scheduled job needed since observer catches all changes. - -- [x] **3.7.7.2** Create `EmbeddingRefreshJob` — ✅ Superseded: embeddings generated at index time by DocumentIndexingService. EmbeddingCache avoids redundant API calls. `memory:index-documents --fresh` available for full re-embed. No periodic refresh needed. - -- [x] **3.7.7.3** Add Document model observer for indexing triggers — ✅ `app/Observers/DocumentObserver.php`. Triggers IndexDocumentJob on `saved` event (non-folder docs only), deletes chunks on `deleted` event. Resolves collection and agent_id from folder hierarchy. - -### 3.7.8 Scope Rules & Security (QMD) - -- [x] **3.7.8.1** Create `MemorySearchScopeGuard` service ← depends on: [3.7.4.1] — ✅ `app/Services/Memory/MemoryScopeGuard.php`. Configurable scope modes in `config/memory.php` under `scope` key. Enforces agent-level access control on document chunks. Applies collection-based filtering via `allowedCollections()` method. - -- [x] **3.7.8.2** Add security checks to `RecallMemory` tool ← depends on: [3.7.8.1] — ✅ RecallMemory tool uses MemoryScopeGuard to enforce per-agent scope. Collection-based access ensures agents only search their own documents. HybridSearchService applies agent_id filtering on all queries. - -### 3.7.9 Enhanced HybridMemorySearch with QMD Features - -- [x] **3.7.9.1** Add result clamping to `HybridMemorySearch` ← depends on: [3.7.4.1, 1.5.4.2] — ✅ Built into `HybridSearchService`. Config in `config/memory.php` under `search` key: `max_results` (default 10), `min_similarity` threshold, `semantic_weight`/`keyword_weight` for hybrid scoring. RerankingService handles final result ordering and clamping. - -- [ ] **3.7.9.2** Add citation generation to search results — Not built (low priority). Results include chunk metadata (document title, collection) but not line-level citations. - -- [x] **3.7.9.3** Add collection filtering to `HybridMemorySearch` ← depends on: [3.7.5.1] — ✅ HybridSearchService accepts `collection` parameter. Document chunks have `collection` column for filtering. RecallMemory tool passes collection from agent context. - ---- - -## Phase 4: Frontend API Integration - -> **Why:** The frontend needs TypeScript methods to call all backend APIs. This phase creates the API client layer that Vue components will use. Centralizing API calls in useApi ensures consistent error handling and type safety. - -### 4.1 Extend useApi Composable - -- [x] **4.1.1** Add agent configuration methods to `useApi.ts` ← depends on: [3.1.1] — ✅ Superseded: Agent config managed via Inertia props + AgentController. Identity files managed via DocumentController API. - - **What:** TypeScript methods for fetching and updating agent configuration - - **Why:** Agent configuration (personality, instructions, identity) is the most frequently edited data. These methods connect the configuration editor components to the backend. - - **Context:** Separate update methods for each field enable autosave without sending entire config. - ```typescript - fetchAgentConfiguration(agentId: string) - updateAgentConfiguration(agentId: string, data) - updateAgentPersonality(agentId: string, content: string) - updateAgentInstructions(agentId: string, content: string) - updateAgentIdentity(agentId: string, identity) - updateAgentToolNotes(agentId: string, notes: string) - ``` - -- [x] **4.1.2** Add agent capabilities methods ← depends on: [3.2.1] — ✅ Superseded: AgentPermissionController provides REST API. AgentCapabilities.vue uses direct axios calls. - - **What:** Methods for managing agent tool/capability assignments - - **Why:** Capabilities UI needs to fetch available capabilities and update agent's enabled tools. Bulk update enables "save all changes" pattern. - - **Context:** `fetchAllCapabilities()` gets the system-wide list. Agent-specific capabilities have per-agent settings (enabled, requires_approval). - ```typescript - fetchAgentCapabilities(agentId: string) - updateAgentCapabilities(agentId: string, capabilities) - fetchAllCapabilities() - ``` - -- [x] **4.1.3** Add agent settings methods ← depends on: [3.3.1] — ✅ Superseded: AgentSettingsPanel.vue uses Inertia forms + direct API calls to AgentController. - - **What:** Methods for managing agent runtime settings - - **Why:** Settings panel needs to fetch and update behavior mode, cost limits, reset policies, and OpenClaw settings (security mode, ask mode, etc.). - - **Context:** Individual update methods allow saving specific settings without full form submission. - ```typescript - fetchAgentSettings(agentId: string) - updateAgentSettings(agentId: string, settings) - updateAgentBehaviorMode(agentId: string, mode) - updateAgentCostLimit(agentId: string, limit: number) - updateAgentResetPolicy(agentId: string, policy) - ``` - -- [x] **4.1.4** Add agent session methods ← depends on: [3.4.1] — ✅ Superseded: Channel-based conversations replace sessions. Chat UI uses MessageController API with Inertia. - - **What:** Methods for managing conversation sessions and messages - - **Why:** Session UI needs to list past sessions, view messages, and create new sessions (context reset). This is central to the chat/memory experience. - - **Context:** Pagination is important for sessions with many messages. `createNewSession` archives current and starts fresh. - ```typescript - fetchAgentSessions(agentId: string, page?: number) - fetchCurrentSession(agentId: string) - createNewSession(agentId: string) - fetchSessionMessages(sessionId: string, page?: number) - archiveSession(sessionId: string) - ``` - -- [x] **4.1.5** Add agent memory methods ← depends on: [3.5.1] — ✅ Superseded: Memory managed via identity file editor (MEMORY.md) + agent tools (SaveMemory/RecallMemory). - - **What:** Methods for managing persistent memories and daily logs - - **Why:** Memory view needs to display, add, and delete memories. Reset is a destructive action that clears all agent knowledge. - - **Context:** Daily logs are read-only from frontend perspective. They're written by the agent during operation. - ```typescript - fetchAgentMemories(agentId: string) - addAgentMemory(agentId: string, entry) - deleteAgentMemory(agentId: string, memoryId: string) - resetAgentMemory(agentId: string) - fetchAgentDailyLogs(agentId: string) - ``` - -- [ ] **4.1.6** Add subagent methods — Not built. Subagent UI not yet implemented (inter-agent communication works via contact_agent tool backend-only). - - **What:** Methods for managing subagent spawning - - **Why:** Subagent UI needs to configure spawn permissions, trigger spawns, monitor runs, and cancel if needed. - - **Context:** Spawn is async - it starts a background task and returns immediately. Frontend polls or uses WebSocket to track progress. - ```typescript - fetchSpawnPermissions(agentId: string) - updateSpawnPermissions(agentId: string, permissions) - spawnSubagent(agentId: string, task) - fetchSubagentRuns(agentId: string) - cancelSubagentRun(runId: string) - ``` - -- [ ] **4.1.7** Add memory search methods (OpenClaw) — Not built. Memory search available to agents via RecallMemory tool but no frontend search UI exists yet. - - **What:** Method for semantic memory search - - **Why:** MemorySearchInput component needs to search agent memories. Returns ranked results with source references. - - **Context:** Uses hybrid search (vector + FTS) on backend. - ```typescript - searchAgentMemory(agentId: string, query: string, limit?: number) - ``` - -- [ ] **4.1.8** Add execution approval methods (OpenClaw) — Not built. Approval works via ApprovalWrappedTool + AgentPermission but no allowlist pattern management UI. - - **What:** Methods for managing command allowlist - - **Why:** AllowlistManager component needs to display, add, and remove allowlist patterns. Shows usage stats for each pattern. - - **Context:** Patterns can be exact commands or globs. Adding a pattern auto-approves matching commands. - ```typescript - fetchAgentAllowlist(agentId: string) - addAllowlistPattern(agentId: string, pattern: string) - removeAllowlistPattern(agentId: string, patternId: string) - ``` - -### 4.2 Update Frontend Components (Already Created) - -> **Why:** Components exist with mock data. This phase connects them to real APIs, making the UI functional. - -- [x] **4.2.1** Connect `AgentPersonalityEditor.vue` to API ← depends on: [4.1.1] — ✅ superseded by AgentIdentityFiles.vue two-panel editor for all 8 identity files - - **What:** Wire personality editor to backend - - **Why:** Users need to edit and save agent personality. Currently uses mock data. - - **Context:** Should show loading state while saving, success toast on save, error handling for failures. - - Replace mock save with `updateAgentPersonality()` - - Add error handling and success feedback - -- [x] **4.2.2** Connect `AgentInstructionsEditor.vue` to API ← depends on: [4.1.1] — ✅ superseded by AgentIdentityFiles.vue two-panel editor - - **What:** Wire instructions editor to backend - - **Why:** Users need to edit and save agent instructions. Currently uses mock data. - - **Context:** Same UX patterns as personality editor - loading, success, error states. - - Replace mock save with `updateAgentInstructions()` - - Add error handling and success feedback - -- [x] **4.2.3** Connect `AgentCapabilities.vue` to API ← depends on: [4.1.2] — ✅ AgentCapabilities.vue with real tool toggles via AgentPermissionController - - **What:** Wire capabilities toggles to backend - - **Why:** Users need to enable/disable tools and set approval requirements. Currently uses mock data. - - **Context:** Should fetch system capabilities list and agent's current assignments. Save should bulk update. - - Fetch real capabilities list - - Save capability changes and notes - -- [x] **4.2.4** Connect `AgentMemoryView.vue` to API ← depends on: [4.1.4, 4.1.5] — ✅ superseded: MEMORY.md managed via identity file editor - - **What:** Wire memory and session display to backend - - **Why:** Users need to view sessions, messages, and memories. Also need to add memories and start new sessions. - - **Context:** Session list should be paginated. Memory add/delete should update list in real-time. - - Fetch real session data - - Fetch real memory entries - - Implement new session creation - - Implement memory add/delete - -- [x] **4.2.5** Connect `AgentSettingsPanel.vue` to API ← depends on: [4.1.3] — ✅ AgentSettingsPanel.vue connected with real behavior mode, brain selector, delete - - **What:** Wire settings form to backend - - **Why:** Users need to configure agent behavior, cost limits, and reset policies. - - **Context:** Some actions (reset, delete) need confirmation dialogs. Pause/resume should update status badge. - - Fetch real settings - - Save settings changes - - Implement reset/pause/delete actions - -- [x] **4.2.6** Connect `AgentIdentityCard.vue` to real data ← depends on: [4.1.1] — ✅ agent identity data fetched from real API - - **What:** Wire identity display to backend - - **Why:** Agent card should show real name, emoji, type, and stats (sessions, messages, cost). - - **Context:** Stats may need separate endpoint or be included in config response. - - Ensure identity is fetched from API - - Display real stats - -- [ ] **4.2.7** Create `AllowlistManager.vue` component (OpenClaw) ← depends on: [4.1.8] - - **What:** New component for managing command allowlist patterns - - **Why:** Users need to pre-approve commands to reduce approval prompts. Should show which patterns are used and when. - - **Context:** Pattern input should support glob syntax hints. Usage stats help users clean up stale patterns. - - List allowlist patterns with usage stats - - Add/remove patterns - - Show last used command for each pattern - -- [ ] **4.2.8** Update `AgentSettingsPanel.vue` with OpenClaw settings ← depends on: [4.1.3] - - **What:** Add new settings fields for OpenClaw features - - **Why:** Users need to configure security mode, ask mode, context reserves, and pruning TTL. - - **Context:** Use dropdowns for enums (security_mode, ask_mode). Number inputs for token counts. Toggle for auto_allow_skills. - - Security mode selector (deny/allowlist/full) - - Ask mode selector (off/on-miss/always) - - Reserve tokens configuration - - Pruning TTL configuration - - Auto-allow skills toggle - -- [ ] **4.2.9** Create `MemorySearchInput.vue` component (OpenClaw) ← depends on: [4.1.7] - - **What:** New component for semantic memory search - - **Why:** Users and agents need to search memories by meaning, not just keywords. Enables finding relevant context quickly. - - **Context:** Search input with debounced API calls. Results show matched chunk with source reference (click to view full entry). - - Search input with results display - - Show matched chunks with source references - - Link to full memory entries - -### 4.3 Update Agent/Show.vue Page - -> **Why:** The main agent page needs to coordinate all components with real data. Replace mock `fetchData()` with actual API calls. - -- [x] **4.3.1** Replace mock `fetchData()` with real API calls ← depends on: [4.2.1-4.2.6] — ✅ Agent/Show.vue fetches real data from API (not mocks) - - **What:** Load all agent data from API on page mount - - **Why:** Page currently shows mock data. Need to fetch real configuration, capabilities, settings, session, and memories. - - **Context:** Consider parallel fetching for better performance. Handle loading and error states for each section. - - Fetch agent configuration - - Fetch capabilities - - Fetch settings - - Fetch current session - - Fetch memories - -- [x] **4.3.2** Implement all event handlers with real API calls — ✅ Agent/Show.vue uses real API for all operations - - **What:** Wire all component events to API methods - - **Why:** User actions (save, delete, etc.) must persist to backend. Currently many handlers just log or show toasts. - - **Context:** Destructive actions (reset, delete) need confirmation dialogs. Success/error feedback via toasts. - - `savePersonality()` → API call - - `saveInstructions()` → API call - - `saveCapabilityNotes()` → API call - - `startNewSession()` → API call - - `addMemoryEntry()` → API call - - `deleteMemoryEntry()` → API call - - `updateSettings()` → API call - - `resetAgentMemory()` → API call with confirmation - - `togglePause()` → API call - - `deleteAgent()` → API call with confirmation - ---- - -## Phase 5: Agent Control Actions - -> **Why:** Agents need operational controls beyond configuration. Users must be able to pause, resume, stop, and delete agents. These are critical safety controls. - -### 5.1 Agent Status Management - -- [x] **5.1.1** Add status control endpoints to `UserController` ← depends on: [2.4.1] — ✅ AgentController handles status (PATCH /api/agents/{id}) - - **What:** API endpoints for controlling agent operational status - - **Why:** Users need to pause agents (stop processing), resume them, or hard-stop current work. Essential for managing runaway or misbehaving agents. - - **Context:** Pause prevents new tasks from starting. Stop cancels the currently running task. - - `POST /api/agents/{id}/pause` - pause agent - - `POST /api/agents/{id}/resume` - resume agent - - `POST /api/agents/{id}/stop` - stop agent (cancel current task) - -- [x] **5.1.2** Implement pause/resume logic — ✅ agent status management (idle/working/sleeping) with AgentStatusUpdated broadcast event - - **What:** Business logic for status transitions and task cancellation - - **Why:** Status changes must update the database and notify connected clients. Stopping requires cancelling the active task. - - **Context:** WebSocket broadcast ensures all open tabs see status change immediately. - - Update agent status to 'paused'/'working'/'idle' - - Cancel any running tasks if stopping - - Broadcast status change via WebSocket - -### 5.2 Agent Deletion - -- [x] **5.2.1** Add agent deletion endpoint ← depends on: [2.4.1] — ✅ DELETE /api/agents/{id} exists in routes + AgentController - - **What:** Soft-delete endpoint for removing an agent - - **Why:** Users need to delete agents they no longer need. Soft delete allows recovery if deletion was accidental. - - **Context:** Must clean up related data: archive sessions, clear/archive memories, remove from any channels. - - `DELETE /api/agents/{id}` - soft delete agent - - Archive all sessions - - Clear memories (or archive) - - Remove from channels - -- [x] **5.2.2** Add confirmation dialog in frontend — ✅ AgentSettingsPanel.vue has delete confirmation via shared ConfirmationDialog component. Shows warning before deletion. - ---- - -## Phase 6: Database Seeding - -> **Why:** Seeders provide initial data for development and testing. Capabilities must be seeded before agents can be configured. Agent seeders create demo agents for testing the system. - -### 6.1 Create Seeders - -- [x] **6.1.1** Create `CapabilitySeeder` ← depends on: [1.4.1] — ✅ Superseded: Capabilities are defined in ToolRegistry::TOOL_MAP as a static registry. No database seeder needed — tools are code-defined, not DB-seeded. - - **What:** Seed the capabilities table with default tools - - **Why:** Capabilities are system-defined, not user-created. This seeder creates the tools that agents can be assigned. - - **Context:** Each capability has default enabled/approval settings. Tool kind (from OpenClaw) should also be set. - - Seed 6 default capabilities: - - Code execution (enabled, no approval, kind: execute) - - File operations (enabled, no approval, kind: edit) - - Git operations (enabled, no approval, kind: execute) - - API requests (enabled, no approval, kind: fetch) - - Database access (enabled, requires approval, kind: execute) - - Production deployment (disabled, requires approval, kind: execute) - -- [x] **6.1.2** Create `AgentConfigurationSeeder` ← depends on: [6.1.1] — ✅ Superseded: Agent config lives on User model fields (is_agent, agent_type, emoji, etc.) + identity documents. UserSeeder creates agents with config. No separate config seeder needed. - - **What:** Create agent configurations for demo/test agents - - **Why:** Developers need agents to test with. Creates pre-configured agents with meaningful personalities and instructions. - - **Context:** Existing seeded agents (Atlas, Echo, Nova, etc.) need configurations. Each agent type should have appropriate capabilities. - - Create configurations for existing seeded agents (Atlas, Echo, Nova, Pixel, Logic, Scout) - - Set default personality and instructions for each type - - Assign appropriate capabilities - -- [x] **6.1.3** Create `AgentSettingsSeeder` ← depends on: [6.1.2] — ✅ Superseded: Agent settings stored in AppSetting model with defaults in config. No separate settings seeder needed. - - **What:** Create default settings for each agent - - **Why:** Agents need settings to operate. This seeder creates sensible defaults for development. - - **Context:** Supervised mode is safest for development. Include OpenClaw settings with reasonable defaults. - - Create default settings for each agent - - Behavior mode: supervised - - Cost limit: 100 - - Reset policy: daily at 4am - - Security mode: allowlist (OpenClaw default) - - Ask mode: on-miss (OpenClaw default) - -### 6.2 Run Seeders - -- [x] **6.2.1** Update `DatabaseSeeder.php` to include new seeders — ✅ Superseded: DatabaseSeeder + UserSeeder handle agent creation. No separate capability/config/settings seeders needed. - - **What:** Register new seeders in the main seeder - - **Why:** Running `php artisan db:seed` should execute all seeders in correct order. - - **Context:** Order matters: Capabilities → AgentConfiguration → AgentSettings (due to foreign keys). - -- [x] **6.2.2** Run `php artisan db:seed` — ✅ Superseded: `php artisan db:seed` works with existing seeders. - - **What:** Execute all seeders to populate database - - **Why:** Creates development data needed to test the system. - - **Context:** Can use `--class` to run specific seeders. Fresh install should run all. - ---- - -## Phase 7: Testing - -> **Why:** Tests ensure the system works correctly and catches regressions. Backend tests verify API contracts and business logic. Frontend tests verify user interactions work as expected. - -### 7.1 Backend Tests - -- [x] **7.1.1** Create `AgentConfigurationTest` feature test ← depends on: [3.1.1] — ✅ AgentControllerTest exists in tests/Feature/ - - **What:** Test agent configuration API endpoints - - **Why:** Configuration is core functionality. Tests ensure CRUD works, authorization prevents unauthorized access, and validation rejects bad data. - - **Context:** Use Laravel's testing helpers. Test as authenticated user and verify cannot access other users' agents. - - Test CRUD operations - - Test authorization (only owners can edit) - - Test validation - -- [x] **7.1.2** Create `AgentCapabilityTest` feature test ← depends on: [3.2.1] — ✅ AgentPermissionControllerTest + AgentPermissionServiceTest + ToolRegistryTest exist - - **What:** Test capability management API - - **Why:** Capabilities control what tools agents can use. Tests ensure assignment works and bulk updates don't break relationships. - - **Context:** Test both individual capability toggle and bulk update. Verify pivot table data (enabled, requires_approval) persists correctly. - - Test capability assignment - - Test bulk updates - -- [ ] **7.1.3** Create `AgentSettingsTest` feature test — Not built yet. Settings managed via AppSetting model + SettingController. - - **What:** Test settings API endpoints - - **Why:** Settings control agent behavior. Tests ensure all settings save correctly and enum validation rejects invalid values. - - **Context:** Include tests for OpenClaw settings (security_mode, ask_mode). Verify JSON fields (reset_policy) serialize/deserialize correctly. - - Test settings updates - - Test enum validation - -- [x] **7.1.4** Create `AgentSessionTest` feature test — ✅ Superseded: ChannelConversationLoaderTest covers conversation loading, compaction triggers, and message retrieval. - - **What:** Test session management API - - **Why:** Sessions are the conversation context. Tests ensure creation, archival, and message retrieval work correctly. - - **Context:** Test pagination for message retrieval. Verify new session creation archives the old one. - - Test session creation - - Test session archival - - Test message retrieval - -- [ ] **7.1.5** Create `AgentMemoryTest` feature test — Not built. Memory services (ChunkingService, EmbeddingService, HybridSearchService) lack dedicated test coverage. - - **What:** Test memory management API - - **Why:** Memories are persistent agent knowledge. Tests ensure CRUD and reset work correctly. - - **Context:** Reset is destructive - test that it clears all memories. Test category enum validation. - - Test memory CRUD - - Test memory reset - -### 7.2 Frontend Tests - -> **Why:** Frontend tests ensure the UI works correctly. Component tests verify individual components, integration tests verify they work together. - -- [ ] **7.2.1** Test Agent/Show.vue renders all tabs - - **What:** Verify the main agent page renders all 7 tabs correctly - - **Why:** Page structure is foundational. If tabs don't render, nothing else works. - - **Context:** Should test tab switching works and correct content appears for each tab. - -- [ ] **7.2.2** Test personality editor save/preview - - **What:** Test markdown editing and preview functionality - - **Why:** Personality editor is a primary user interaction. Save must work, preview must render markdown. - - **Context:** Test markdown rendering, save button calls API, success/error feedback appears. - -- [ ] **7.2.3** Test instructions editor save/preview - - **What:** Test instructions editing functionality - - **Why:** Same importance as personality editor. Instructions define agent behavior. - - **Context:** Same test patterns as personality editor. - -- [ ] **7.2.4** Test capabilities toggle - - **What:** Test capability enable/disable and approval toggle - - **Why:** Capability toggles control tool access. Must work reliably. - - **Context:** Test toggle state changes, save persists changes, list refreshes correctly. - -- [ ] **7.2.5** Test memory add/delete - - **What:** Test adding and deleting memory entries - - **Why:** Memory management is important for agent knowledge. Add/delete must work correctly. - - **Context:** Test form submission, new entry appears in list, delete removes entry. - -- [ ] **7.2.6** Test settings changes - - **What:** Test all settings form fields and save - - **Why:** Settings affect agent behavior. All fields must persist correctly. - - **Context:** Test each setting type: enums, numbers, JSON (reset_policy), toggles. - -- [ ] **7.2.7** Test dark mode on all components - - **What:** Verify all components display correctly in dark mode - - **Why:** Dark mode is expected feature. Broken dark mode is poor UX. - - **Context:** Use Tailwind's dark: prefix. Verify text contrast, background colors, borders. - ---- - -## Phase 8: Future Enhancements (Post-MVP) - -> **Why:** These features are valuable but not required for MVP. They enhance the system with advanced capabilities like auto-compaction, subagent spawning, skills, and webhooks. - -### 8.1 Vector Memory Search -- [x] **8.1.1** ~~Install pgvector extension~~ → Moved to Phase 1.5.1.1 -- [x] **8.1.2** ~~Create `memory_embeddings` table~~ → Moved to Phase 1.5.1.2 -- [x] **8.1.3** ~~Implement embedding generation service~~ → Moved to Phase 3.7.1.1 -- [x] **8.1.4** ~~Create semantic search endpoint~~ → Moved to Phase 3.7.4.2 -- [x] **8.1.5** ~~Add search UI to memory view~~ → Moved to Phase 4.2.9 - -### 8.2 Context Management - -- [x] **8.2.1** ~~Implement context pruning service~~ → Moved to Phase 3.6.3.1 - -- [x] **8.2.2** Implement auto-compaction — ✅ `app/Services/Memory/ConversationCompactionService.php`. Triggered by ChannelConversationLoader when token count exceeds threshold (configurable in `config/memory.php` under `compaction`). Summarizes older messages via LLM, stores ConversationSummary model, preserves recent messages. CompactConversationJob for async execution. - -- [x] **8.2.3** ~~Add pre-compaction memory flush~~ → Moved to Phase 3.6.2.1 - -- [ ] **8.2.4** Add compaction history view - - **What:** UI to view past compaction events and their summaries - - **Why:** Users may want to see what was compacted and when. Helps understand what context was lost. - - **Context:** Store compaction summaries in session_messages with type 'compaction'. Display in a timeline view. - -### 8.3 Subagent Spawning UI - -> **Why:** Backend supports subagents (Phase 3.5.3.4) but needs frontend UI. These components let users spawn agents and monitor their work. - -- [ ] **8.3.1** Create spawn dialog component - - **What:** Modal dialog for spawning a subagent - - **Why:** Users need to select which agent to spawn and provide a task description. - - **Context:** Should show available agents (based on spawn permissions) and task input field. - -- [ ] **8.3.2** Add spawn button to agent page - - **What:** Button in agent page to trigger spawn dialog - - **Why:** Entry point for spawning subagents from the current agent. - - **Context:** Button should be disabled if agent has no spawn permissions. - -- [ ] **8.3.3** Show running subagents list - - **What:** Component showing currently running subagent tasks - - **Why:** Users need to monitor spawned agents - see progress, status, and cancel if needed. - - **Context:** Real-time updates via WebSocket. Show status badge (running, success, error). - -- [ ] **8.3.4** Add subagent result announcement - - **What:** Notification when a subagent completes its task - - **Why:** Users need to know when spawned work is done. Announce results in the parent agent's chat. - - **Context:** WebSocket notification triggers toast and chat announcement. - -### 8.4 Skills System - -> **Why:** Skills are reusable, composable agent capabilities. OpenClaw has a sophisticated skill system with tiering. This enables "slash commands" and skill-based agent composition. - -- [ ] **8.4.1** Create skills database tables - - **What:** Tables for skill definitions, versions, and agent-skill assignments - - **Why:** Skills need persistent storage. Versioning enables skill updates without breaking existing agents. - - **Context:** OpenClaw has skill tiers: bundled (system), managed (installed), workspace (custom). - -- [ ] **8.4.2** Create skills management UI - - **What:** UI for browsing, installing, and managing skills - - **Why:** Users need to discover available skills and assign them to agents. - - **Context:** Similar to VS Code extension marketplace but for agent skills. - -- [ ] **8.4.3** Implement skill tiering (workspace > managed > bundled) - - **What:** Priority system for skill resolution - - **Why:** Users may want to override bundled skills with custom versions. Tiering ensures custom skills take precedence. - - **Context:** OpenClaw resolution: workspace (highest) → managed → bundled (lowest). - -- [ ] **8.4.4** Add skill invocation tracking - - **What:** Track which skills are used and how often - - **Why:** Usage analytics help users understand agent behavior. Useful for optimization and debugging. - - **Context:** Store invocation counts, last used timestamp, average execution time. - -### 8.5 Webhooks & External Integrations - -> **Why:** Agents need to be triggered by external events (GitHub commits, Slack messages, etc.). Webhooks enable event-driven agent activation. - -- [x] **8.5.1** Already implemented basic UI (Integrations.vue) — ✅ Integrations.vue exists with Telegram and Plausible configured - - **What:** UI skeleton for integrations exists - - **Why:** Placeholder for webhook management interface. - -- [ ] **8.5.2** Create webhooks database table - - **What:** Table for webhook endpoint definitions - - **Why:** Store webhook URLs, secrets, target agents, and event filters. - - **Context:** Each webhook has a unique URL, secret for verification, and maps to an agent + action. - -- [ ] **8.5.3** Implement webhook processing logic - - **What:** Controller and service for receiving and processing webhooks - - **Why:** Incoming webhooks need to be verified (signature), parsed, and routed to the appropriate agent. - - **Context:** Support common webhook formats (GitHub, Slack, generic). Queue for async processing. - -- [ ] **8.5.4** Add webhook testing UI - - **What:** UI for testing webhook endpoints - - **Why:** Users need to verify webhooks work before deploying. Test sends a sample payload and shows result. - - **Context:** Similar to Stripe's webhook testing. Show recent webhook deliveries and their status. - ---- - -## Phase 3.8: Plugin System - -> **Why:** OpenClaw's plugin architecture enables extensibility without modifying core code. Plugins add tools, channels, providers, skills, and more. OpenCompany should support the same extensibility via Laravel packages. - -### 3.8.1 Plugin Infrastructure - -- [ ] **3.8.1.1** Create `plugins` migration - - **What:** Table to track installed plugins and their configuration - - **Why:** Need to know which plugins are installed, enabled, and how they're configured. - - Fields: `id`, `name`, `version`, `description`, `author`, `enabled`, `capabilities` (JSON), `config` (JSON), `slot` (nullable enum: memory/sandbox/browser), `created_at`, `updated_at` - -- [ ] **3.8.1.2** Create `Plugin` model - - **What:** Eloquent model for plugin management - - **Why:** Central model for plugin CRUD and capability resolution. - - Relationships: `hasMany(PluginCapability)` - - Scopes: `enabled()`, `withCapability($type)`, `forSlot($slot)` - -- [ ] **3.8.1.3** Create `PluginRegistryService` - - **What:** Service that discovers, validates, and registers plugins - - **Why:** Centralized plugin lifecycle management. Handles discovery chain: config → workspace → global → bundled. - - **Context:** Plugins are Laravel packages with service providers. The registry tracks which capabilities each plugin provides. - - `discover()` - scan for available plugins - - `register(Plugin $plugin)` - register plugin capabilities - - `validateConfig(Plugin $plugin)` - validate plugin config against schema - - `resolveSlot(string $slot)` - get the active plugin for an exclusive slot - -### 3.8.2 Plugin Capabilities - -- [ ] **3.8.2.1** Create capability interfaces - - **What:** PHP interfaces for each plugin capability type - - **Why:** Type-safe contracts that plugins must implement. Ensures consistency across all plugins. - - **Context:** OpenClaw supports 10 capability types. Start with the most useful ones. - ```php - interface ProvidesTools { public function tools(): array; } - interface ProvidesChannels { public function channels(): array; } - interface ProvidesProviders { public function providers(): array; } - interface ProvidesSkills { public function skills(): array; } - interface ProvidesHooks { public function hooks(): array; } - ``` - -- [ ] **3.8.2.2** Create exclusive slot system - - **What:** Logic to enforce that only one plugin can claim each exclusive slot - - **Why:** Some capabilities (memory backend, sandbox) can only have one active implementation. - - **Context:** If multiple plugins claim the same slot, highest-precedence one wins. - -### 3.8.3 Plugin Management API - -- [ ] **3.8.3.1** Create `PluginController` - - **What:** API endpoints for managing plugins - - **Why:** Frontend needs to list, enable/disable, and configure plugins. - - `GET /api/plugins` - list all plugins - - `POST /api/plugins/{id}/enable` - enable plugin - - `POST /api/plugins/{id}/disable` - disable plugin - - `PUT /api/plugins/{id}/config` - update plugin config - - `POST /api/plugins/discover` - trigger plugin discovery - -- [ ] **3.8.3.2** Create plugin management UI - - **What:** Vue component for plugin management - - **Why:** Users need to see installed plugins, toggle them, and configure settings. - - **Context:** Similar to VS Code extension panel. Show capabilities, slot claims, config fields. - ---- - -## Phase 3.9: Multi-Device Support - -> **Why:** OpenClaw's gateway enables agents to be accessed from any device (iOS, Android, macOS, web). A node registry tracks connected devices and routes tasks based on device capabilities. OpenCompany should support similar multi-device access. - -### 3.9.1 Node Registry - -- [ ] **3.9.1.1** Create `connected_devices` migration - - **What:** Table to track connected devices/clients - - **Why:** Need to know which devices are connected, their capabilities, and health status. - - Fields: `id`, `user_id` (FK), `device_id` (unique string), `platform` (enum: ios/android/macos/web/desktop), `device_name`, `capabilities` (JSON), `last_heartbeat_at`, `is_online`, `metadata` (JSON), `created_at`, `updated_at` - -- [ ] **3.9.1.2** Create `ConnectedDevice` model - - **What:** Eloquent model for device management - - **Why:** Track device state and enable capability-based routing. - - Relationships: `belongsTo(User)` - - Scopes: `online()`, `withCapability($cap)`, `forPlatform($platform)` - - Methods: `heartbeat()`, `markOffline()`, `hasCapability($cap)` - -- [ ] **3.9.1.3** Create WebSocket heartbeat system - - **What:** Periodic heartbeat via Reverb to track device health - - **Why:** Need to detect disconnected devices. Devices send heartbeat every 30 seconds. - - **Context:** Uses existing Laravel Reverb WebSocket. Add presence channel for device tracking. - ```php - // routes/channels.php - Broadcast::channel('devices.{userId}', function ($user, $userId) { - return $user->id === $userId ? [ - 'id' => $user->id, - 'name' => $user->name, - 'device' => request()->header('X-Device-Id'), - ] : null; - }); - ``` - -### 3.9.2 Device-Aware Routing - -- [ ] **3.9.2.1** Create `DeviceRouter` service - - **What:** Service that routes notifications and tasks to the right device - - **Why:** Some tasks need specific device capabilities (e.g., browser tasks → desktop device). - - `routeNotification($user, $notification)` - route to best device - - `routeTask($user, $task)` - route to device with required capabilities - - `broadcastToAll($user, $event)` - broadcast to all connected devices - -- [ ] **3.9.2.2** Create device status dashboard component - - **What:** Vue component showing connected devices and their status - - **Why:** Users need to see which devices are connected, online, and their capabilities. - - Real-time status via WebSocket - - Show platform icon, device name, last activity, capabilities - -### 3.9.3 Cross-Platform Sync - -- [ ] **3.9.3.1** Create sync event system - - **What:** Broadcast state changes to all connected devices - - **Why:** Agent state (messages, tasks, approvals) must be consistent across devices. - - **Context:** Use existing Reverb channels. Add sync events for: new messages, task updates, approval requests, agent status changes. - ---- - -## Phase 3.10: Cron & Scheduled Tasks - -> **Why:** OpenClaw supports cron-based autonomous agent execution. Agents can perform tasks on a schedule without human triggers — daily summaries, periodic monitoring, scheduled reports. OpenCompany should support the same autonomous agent capabilities. - -### 3.10.1 Cron Job Infrastructure - -- [ ] **3.10.1.1** Create `agent_cron_jobs` migration - - **What:** Table for scheduled agent tasks - - **Why:** Store cron job definitions with schedule, task prompt, and delivery configuration. - - Fields: `id`, `agent_id` (FK to users), `name`, `schedule` (cron expression), `task` (TEXT - prompt), `delivery_mode` (enum: announce/none/post), `target_channel_id` (nullable FK), `enabled`, `one_shot`, `last_run_at`, `last_result` (JSON), `created_at`, `updated_at` - -- [ ] **3.10.1.2** Create `AgentCronJob` model - - **What:** Eloquent model for cron job management - - **Why:** Central model for cron CRUD and execution tracking. - - Relationships: `belongsTo(User, 'agent_id')`, `belongsTo(Channel, 'target_channel_id')` - - Scopes: `enabled()`, `forAgent($agentId)`, `dueNow()` - - Methods: `isDue()`, `markRan()`, `shouldAutoDelete()` - -- [ ] **3.10.1.3** Create `ExecuteAgentCronJob` queue job - - **What:** Job that executes a scheduled agent task - - **Why:** Cron jobs should run asynchronously on queue workers, with isolated sessions. - - **Context:** Creates an isolated session (separate from conversation context) so cron execution doesn't pollute chat history. - ```php - class ExecuteAgentCronJob implements ShouldQueue - { - public function handle(): void - { - // Create isolated session for cron execution - $session = AgentSession::create([ - 'session_key' => "cron:{$this->cronJob->id}:" . now()->timestamp, - 'status' => 'active', - ]); - - $agent = OpenCompanyAgent::for($this->cronJob->agent); - $response = $agent->prompt($this->cronJob->task); - - // Deliver based on mode - match ($this->cronJob->delivery_mode) { - 'announce' => $this->announceResult($response), - 'post' => $this->postToChannel($response), - 'none' => null, - }; - - // Auto-delete one-shot jobs - if ($this->cronJob->one_shot) { - $this->cronJob->delete(); - } - - $this->cronJob->update([ - 'last_run_at' => now(), - 'last_result' => ['response' => (string) $response], - ]); - } - } - ``` - -### 3.10.2 Scheduler Integration - -- [ ] **3.10.2.1** Register cron jobs with Laravel scheduler - - **What:** Load agent cron jobs from DB and register with `Schedule` - - **Why:** Laravel's scheduler handles cron expression evaluation, overlap prevention, and single-server execution. - ```php - // app/Console/Kernel.php - protected function schedule(Schedule $schedule): void - { - AgentCronJob::where('enabled', true)->each(function ($job) use ($schedule) { - $schedule->job(new ExecuteAgentCronJob($job)) - ->cron($job->schedule) - ->withoutOverlapping() - ->onOneServer(); - }); - } - ``` - -- [ ] **3.10.2.2** Create cron job execution history migration - - **What:** Table to track cron job execution history - - **Why:** Need audit trail for scheduled executions. Track success/failure, runtime, token usage. - - Fields: `id`, `cron_job_id` (FK), `status` (enum: success/error/timeout), `started_at`, `completed_at`, `token_count`, `result` (JSON), `error` (TEXT nullable) - -### 3.10.3 Cron Management API & UI - -- [ ] **3.10.3.1** Create `AgentCronJobController` - - **What:** API endpoints for managing agent cron jobs - - **Why:** Frontend needs CRUD for cron jobs plus manual trigger and history view. - - `GET /api/agents/{id}/cron-jobs` - list cron jobs - - `POST /api/agents/{id}/cron-jobs` - create cron job - - `PUT /api/cron-jobs/{id}` - update cron job - - `DELETE /api/cron-jobs/{id}` - delete cron job - - `POST /api/cron-jobs/{id}/trigger` - manual trigger - - `GET /api/cron-jobs/{id}/history` - execution history - -- [ ] **3.10.3.2** Create cron management Vue component - - **What:** UI for managing scheduled agent tasks - - **Why:** Users need to create, edit, enable/disable, and monitor cron jobs. - - **Context:** Include cron expression helper (common presets: daily, hourly, weekly, etc.), delivery mode selector, and execution history log. - ---- - -### 3.11 Heartbeat System - -- [ ] **3.11.1** Add heartbeat fields to `agent_configs` migration ← depends on: [1.1.3] - - **What:** Migration adding `heartbeat_prompt`, `heartbeat_enabled`, `heartbeat_interval`, `heartbeat_active_start`, `heartbeat_active_end`, `heartbeat_timezone` to `agent_configs` table - - **Why:** Agents need configurable heartbeat settings. OpenClaw stores this in HEARTBEAT.md; we use DB fields for admin UI editability. - -- [ ] **3.11.2** Create `HeartbeatJob` ← depends on: [3.11.1, 3.1.1] - - **What:** Queue job that runs an agent's heartbeat check: loads prompt, calls AI SDK, posts results to channel (or skips if ack-only) - - **Why:** This is the core heartbeat execution. Adapted from OpenClaw's heartbeat-runner.ts. - - Active hours gating via `between()` check - - Ack suppression for responses under 30 chars or containing `HEARTBEAT_OK` - -- [ ] **3.11.3** Wire scheduler to dispatch heartbeats ← depends on: [3.11.2] - - **What:** Add scheduler entry in `app/Console/Kernel.php` that queries active agents with heartbeat enabled, dispatches `HeartbeatJob` for each - - **Why:** Replaces OpenClaw's Node.js setInterval with Laravel's built-in scheduler. - - Default interval: every 30 minutes (configurable per agent via `heartbeat_interval`) - -- [ ] **3.11.4** Add heartbeat configuration to agent admin UI ← depends on: [3.11.1, 4.1.x] - - **What:** Add heartbeat settings section to Agent/Show.vue Settings tab: enable toggle, prompt textarea, interval select, active hours inputs - - **Why:** Admins need to configure heartbeat behavior per agent without touching the database directly. - ---- - -### 3.12 Agent Execution Loop (Core Agent Brain) - -> **This is the most critical phase.** Without this, agents cannot process messages or execute tasks. All other agent features (memory, heartbeat, sub-agents) depend on this. - -- [x] **3.12.1** Create `AgentPromptBuilder` service ← depends on: [2.1.x, 3.1.1] — ✅ superseded: system prompt assembly built into OpenCompanyAgent using Document-based identity files - - **What:** Service that assembles the system prompt from agent config fields (personality, instructions), user context, tool documentation, and memory. Follows OpenClaw's injection order: identity → personality → user → instructions → tools → memory. - - **Why:** Clean separation of prompt assembly from execution. Handles sub-agent restrictions (only instructions, no personality/user context). - -- [x] **3.12.2** Create `AgentToolExecutor` service ← depends on: [3.5.1.x] — ✅ superseded: tool resolution handled by ToolRegistry.getToolsForAgent() + AgentPermissionService - - **What:** Service that resolves available tools for an agent (based on capabilities/permissions), executes tool calls from LLM responses, and returns results. - - **Why:** Adapted from OpenClaw's tool execution loop. Handles the tool call → result → feed back cycle. - - Tool resolution follows permission stack: profile → allow/deny → agent-specific restrictions - -- [x] **3.12.3** Create `ProcessAgentMessageJob` ← depends on: [3.12.1, 3.12.2] — ✅ implemented as AgentRespondJob (core agent brain) - - **What:** The core agent runner job. Dispatched when an agent is mentioned or receives a DM. Loads context, builds prompt, calls AI SDK with streaming, processes tool calls, stores response, broadcasts via Reverb. - - **Why:** This is the "agent brain" — the single most important piece of the system. Replaces OpenClaw's `runEmbeddedPiAgent()`. - - Queue: `agent-{id}` (serialized per agent to prevent race conditions) - - Includes: conversation history loading, streaming response broadcast, post-processing (memory indexing, compaction check) - -- [x] **3.12.4** Wire message controller to dispatch agent runs ← depends on: [3.12.3] — ✅ MessageController dispatches AgentRespondJob on @mention and DM - - **What:** Update `MessageController::store()` to detect @mentions of agents and dispatch `ProcessAgentMessageJob`. Also handle DM channels where the other participant is an agent. - - **Why:** This is the trigger that makes agents respond to messages. - - Detection: check message content for @mentions matching agent names, or check if channel is a DM with an agent member - -- [x] **3.12.5** Add response streaming via Reverb ← depends on: [3.12.3] — ✅ streaming via Reverb WebSocket (MessageSent, AgentStatusUpdated, TypingIndicator events) - - **What:** Create `AgentTyping` broadcast event for partial response streaming. Clients receive chunks as the agent generates them, showing real-time typing. - - **Why:** UX requirement — users should see agents "typing" in real-time, not wait for complete responses. - - Broadcast on channel: `channel.{id}` - - Event data: `{ agentId, chunk, isComplete }` - -- [ ] **3.12.6** Add model failover support ← depends on: [3.12.3] - - **What:** Configure primary + fallback models per agent in `agent_configs`. `ProcessAgentMessageJob` tries primary first, falls back to alternatives on failure. - - **Why:** Adapted from OpenClaw's failover chain. Ensures agents stay operational if a provider has an outage. - - Config: `model_primary`, `model_fallbacks` (JSON array) on agent_configs - ---- - -## Verification Checklist - -### Functional Verification -- [x] Navigate to `/agent/{id}` - page loads without errors — ✅ Agent/Show.vue with Inertia route -- [x] All 7 tabs render correctly (Overview, Personality, Instructions, Capabilities, Memory, Activity, Settings) — ✅ tabs: Overview, Tasks, Identity, Capabilities, Activity, Settings -- [ ] Edit personality → saves to database → persists on refresh -- [ ] Edit instructions → saves to database → persists on refresh -- [ ] Toggle capability → saves to database → persists on refresh -- [ ] Add memory entry → appears in list → persists on refresh -- [ ] Delete memory entry → removed from list -- [ ] Change settings → saves to database → persists on refresh -- [ ] Start new session → creates new session → clears context -- [ ] Pause agent → status changes → agent stops working -- [ ] Resume agent → status changes → agent can work again - -### UI Verification -- [ ] Dark mode works on all components -- [ ] Loading states show skeleton placeholders -- [ ] Error states show appropriate messages -- [ ] Mobile responsive layout works -- [ ] Markdown preview renders correctly -- [ ] Context usage progress bar updates - -### Data Integrity -- [ ] Agent config belongs to correct user -- [ ] Session messages ordered by timestamp -- [ ] Memory entries have correct categories -- [ ] Settings have valid enum values - -### OpenClaw Features Verification -- [ ] Pre-compaction flush runs before reaching reserve threshold -- [ ] Tool kinds correctly inferred and affect approval logic -- [ ] Allowlist patterns matched and tracked (last_used_at updates) -- [ ] Session pruning triggers after TTL expires -- [ ] NO_REPLY messages suppressed from UI -- [ ] Hybrid search returns relevant results (vector + FTS) -- [ ] Embedding cache prevents duplicate API calls -- [ ] Reserve tokens enforced during compaction -- [ ] Security modes work correctly (deny/allowlist/full) -- [ ] Ask modes work correctly (off/on-miss/always) - ---- - -## File Summary - -### Packages to Install -```bash -# Required -composer require laravel/ai - -# Optional -composer require laravel/mcp # MCP server for external AI clients -composer require laravel/horizon # Queue monitoring dashboard -``` - -### Migrations to Create (17 files) -``` -database/migrations/ -├── xxxx_create_agent_configurations_table.php -├── xxxx_create_capabilities_table.php -├── xxxx_create_agent_capabilities_table.php -├── xxxx_create_agent_settings_table.php # Includes OpenClaw fields -├── xxxx_create_agent_sessions_table.php # Includes OpenClaw fields -├── xxxx_create_agent_session_messages_table.php # Includes OpenClaw fields -├── xxxx_create_agent_memories_table.php -├── xxxx_create_agent_memory_daily_logs_table.php -├── xxxx_create_subagent_spawn_permissions_table.php -├── xxxx_create_subagent_runs_table.php -├── xxxx_create_agent_tool_allowlist_table.php # OpenClaw -├── xxxx_create_memory_chunks_table.php # OpenClaw -├── xxxx_create_embedding_cache_table.php # OpenClaw -├── xxxx_create_plugins_table.php # Plugin system -├── xxxx_create_connected_devices_table.php # Multi-device -├── xxxx_create_agent_cron_jobs_table.php # Cron system -└── xxxx_create_cron_job_history_table.php # Cron history -``` - -### Models to Create (17 files) -``` -app/Models/ -├── AgentConfiguration.php -├── Capability.php -├── AgentCapability.php -├── AgentSettings.php -├── AgentSession.php -├── AgentSessionMessage.php -├── AgentMemory.php -├── AgentMemoryDailyLog.php -├── SubagentSpawnPermission.php -├── SubagentRun.php -├── AgentToolAllowlist.php # OpenClaw -├── MemoryChunk.php # OpenClaw -├── EmbeddingCache.php # OpenClaw -├── MemoryCollection.php # QMD collections -├── Plugin.php # Plugin system -├── ConnectedDevice.php # Multi-device -├── AgentCronJob.php # Cron system -└── CronJobHistory.php # Cron history -``` - -### Controllers to Create (12 files) -``` -app/Http/Controllers/Api/ -├── AgentConfigurationController.php -├── AgentCapabilityController.php -├── CapabilityController.php -├── AgentSettingsController.php -├── AgentSessionController.php -├── AgentMemoryController.php -├── SubagentController.php -├── MemorySearchController.php # OpenClaw -├── MemoryCollectionController.php # QMD collection management -├── AllowlistController.php # OpenClaw -├── PluginController.php # Plugin system -├── ConnectedDeviceController.php # Multi-device -└── AgentCronJobController.php # Cron system -``` - -### Agent + Tools to Create -``` -app/Agents/ -├── OpenCompanyAgent.php # Single dynamic agent class -├── DynamicProviderResolver.php # Resolves provider from IntegrationSetting -├── ToolRegistry.php # Maps DB capabilities to tool classes -└── Tools/ - ├── Internal/ # Workspace tools - │ ├── SearchDocuments.php - │ ├── ReadDocument.php - │ ├── UpdateDocument.php - │ ├── CreateListItem.php - │ ├── UpdateListItem.php - │ ├── SendMessage.php - │ ├── CreateTaskStep.php - │ ├── CreateApproval.php - │ └── QueryDataTable.php - ├── External/ # SDK built-in wrappers - │ ├── WebSearch.php - │ └── WebFetch.php - └── Memory/ # Memory tools - ├── SaveMemory.php - └── RecallMemory.php -``` - -### Agent Jobs to Create (9 files) -``` -app/Jobs/Agent/ -├── FetchAgentConfigJob.php -├── ExecuteAgentJob.php -├── CreateApprovalRequestJob.php -├── ExecuteApprovedActionJob.php -├── SaveSessionMessageJob.php -├── MemoryFlushJob.php # OpenClaw -├── PruneSessionJob.php # OpenClaw -└── CheckMemoryFlushJob.php # OpenClaw -``` - -### Agent Services to Create (3 files) -``` -app/Services/Agent/ -├── AgentTaskService.php -├── AgentSessionResetService.php -└── SubagentSpawnService.php -``` - -### Services to Create (16 files) -``` -app/Services/ -├── AgentToolRegistry.php -├── AgentPromptBuilder.php # System prompt assembly (OpenClaw workspace files mapping) -├── AgentToolExecutor.php # Tool resolution + execution loop -├── ContextWindowGuard.php # OpenClaw -├── MemoryFlushService.php # OpenClaw -├── SessionPruningService.php # OpenClaw -├── ToolKindClassifier.php # OpenClaw -├── ExecutionApprovalService.php # OpenClaw -├── EmbeddingService.php # OpenClaw -├── EmbeddingCacheService.php # OpenClaw -├── ChunkingService.php # OpenClaw -├── MemoryIndexService.php # OpenClaw -├── HybridMemorySearch.php # OpenClaw -├── HybridDocumentSearch.php # QMD-enhanced hybrid search -├── MemorySearchScopeGuard.php # QMD scope rules -├── PluginRegistryService.php # Plugin system -├── DeviceRouter.php # Multi-device -└── CronExecutionService.php # Cron system -``` - -### Frontend Files to Update (2 files) -``` -resources/js/ -├── composables/useApi.ts (add ~30 new methods) -└── Pages/Agent/Show.vue (replace mocks with API calls) -``` - -### Frontend Components to Create (3 files - OpenClaw) -``` -resources/js/Components/agents/ -├── AllowlistManager.vue -├── MemorySearchInput.vue -└── SecurityModeSelector.vue -``` - -### Seeders to Create (3 files) -``` -database/seeders/ -├── CapabilitySeeder.php -├── AgentConfigurationSeeder.php -└── AgentSettingsSeeder.php -``` - -### Jobs to Create -``` -app/Jobs/ -├── IndexAgentMemoryJob.php # Index single document into memory_chunks -├── ExportSessionTranscriptJob.php # Export session messages to markdown document -├── PeriodicReindexJob.php # Scheduled re-index (every 5 minutes) -├── EmbeddingRefreshJob.php # Scheduled embedding refresh (hourly) -├── ReindexAgentJob.php # Full agent re-index on demand -├── HeartbeatJob.php # Periodic agent heartbeat check -└── ProcessAgentMessageJob.php # Core agent brain — message processing + tool execution -``` - -### Config Files to Create -``` -config/ -└── memory.php # QMD search parameters & indexing config -``` - ---- - -## Implementation Priority Order - -**Day 1: Package Setup** -0. Install & configure packages (0.1.1 - 0.2.2) - -**Week 1: Foundation** -1. Database migrations (1.1.1 - 1.4.3) -2. Memory search infrastructure (1.5.1 - 1.5.4) ← includes QMD collections & clamping -3. Core models (2.1.1 - 2.4.2) - -**Week 2: API Layer** -4. Controllers (3.1.1 - 3.7.1) -5. Seeders (6.1.1 - 6.2.2) - -**Week 3: Agent Execution Jobs** -6. AI Tools (3.5.1.1 - 3.5.1.3) -7. Agent Jobs (3.5.2.1 - 3.5.2.10) -8. Agent Services (3.5.3.1 - 3.5.3.4) -9. Queue Infrastructure (3.5.4.1 - 3.5.4.3) - -**Week 4: Context Management (OpenClaw)** -10. Context Window Guard (3.6.1) -11. Pre-Compaction Memory Flush (3.6.2) -12. Session Pruning (3.6.3) -13. Tool Kind Classification (3.6.4) -14. Execution Approval System (3.6.5) - -**Week 5-6: Hybrid Memory Search + QMD Features (OpenClaw)** -15. Embedding Service (3.7.1) -16. Chunking Service (3.7.2) -17. Memory Indexing (3.7.3) -18. Hybrid Search (3.7.4) -19. Collection Management (3.7.5) -20. Session Transcript Indexing (3.7.6) -21. Periodic Re-Indexing (3.7.7) -22. Scope Rules & Security (3.7.8) -23. Enhanced Search with QMD Features (3.7.9) - -**Week 7: Frontend Integration** -24. useApi methods (4.1.1 - 4.1.8) -25. Component connections (4.2.1 - 4.2.9) -26. Page updates (4.3.1 - 4.3.2) - -**Week 8: Heartbeat System** -27. Heartbeat migration (3.11.1) -28. HeartbeatJob (3.11.2) -29. Scheduler wiring (3.11.3) -30. Heartbeat admin UI (3.11.4) - -**Week 9: Agent Brain** -31. AgentPromptBuilder service (3.12.1) -32. AgentToolExecutor service (3.12.2) -33. ProcessAgentMessageJob (3.12.3) -34. Wire message controller (3.12.4) -35. Response streaming via Reverb (3.12.5) -36. Model failover support (3.12.6) - -**Week 10: Polish & Testing** -37. Agent control actions (5.1.1 - 5.2.2) -38. Testing (7.1.1 - 7.2.7) - -**Post-MVP: Enhancements** -39. Subagent spawning (8.3.x) -40. Skills system (8.4.x) -41. Webhooks (8.5.x) -42. Plugin system (3.8.x) -43. Multi-device support (3.9.x) -44. Cron & scheduled tasks (3.10.x) - ---- - -## Status Update (February 2026) - -> **This section reflects what has actually been built vs. what remains from the original plan above. Many phases were implemented organically and differ from the original spec — some items were superseded, others were built differently.** - -### What's Been Built (Completed) - -The following are **done and working** — these can be checked off from the phases above: - -#### Agent Execution Engine (supersedes Phase 3.12) -- [x] `OpenCompanyAgent` — single dynamic agent class with identity file-based system prompts -- [x] `AgentRespondJob` — core agent response lifecycle (LLM call → response → task completion) -- [x] `ExecuteAgentTaskJob` — queue job for agent task execution -- [x] `AgentResumeFromSleepJob` — wake sleeping agents -- [x] `DynamicProviderResolver` — resolves LLM provider/model from `brain` field + IntegrationSettings -- [x] `ChannelConversationLoader` — loads conversation history for agent context -- [x] `AgentChatService` — orchestrates agent chat interactions -- [x] Message controller dispatches agent runs on @mention and DM -- [x] Response streaming via Reverb WebSocket - -#### Agent Tools (supersedes Phase 3.5.1) -- [x] `ToolRegistry` — maps agent permissions to tool class instances (33 tools total) -- [x] `ApprovalWrappedTool` — wraps tools that require approval -- [x] Workspace tools: `SearchDocuments`, `ManageDocument`, `CommentOnDocument` -- [x] Messaging tools: `SendChannelMessage`, `ManageMessage`, `ReadChannel`, `ListChannels` -- [x] List tools: `ManageListItem`, `QueryListItems`, `ManageListStatus` -- [x] Task tools: `CreateTaskStep`, `UpdateCurrentTask` -- [x] Table tools: `ManageTable`, `ManageTableRows`, `QueryTable` -- [x] Calendar tools: `ManageCalendarEvent`, `QueryCalendar` -- [x] Approval tools: `WaitForApproval`, `Wait` -- [x] Integration tools: `SendTelegramNotification`, Plausible suite (8 tools) -- [x] Creative tools: `CreateJpGraphChart`, `RenderSvg` -- [x] Meta tools: `GetToolInfo` - -#### Agent Identity System (supersedes Phase 1.1.1, 2.1.1, 3.1.1) -- [x] Document-based identity (8 `.md` files per agent: IDENTITY, SOUL, USER, AGENTS, TOOLS, MEMORY, HEARTBEAT, BOOTSTRAP) -- [x] `AgentDocumentService` — creates/manages identity file structure per agent -- [x] Identity files API (`GET/PUT /api/agents/{id}/identity-files/{type}`) -- [x] `AgentIdentityFiles.vue` — OpenClaw-style two-panel editor for all 8 files -- [x] BOOTSTRAP.md auto-clear after first agent interaction (`bootstrapped_at` tracking) - -#### Agent Permissions (supersedes Phase 1.1.2-1.1.3, 3.2.1) -- [x] `AgentPermission` model — unified scope-based permissions (tool, channel, folder) -- [x] `AgentPermissionService` — resolves enabled tools, channels, folders, integrations -- [x] `AgentPermissionController` — API for managing all permission types -- [x] UI: tool toggles, channel access, folder access, integration toggles on Agent/Show.vue - -#### Agent Configuration & Settings (partial supersede of Phase 1.1.4, 3.3.1) -- [x] `behavior_mode` on User model (autonomous/supervised/strict) -- [x] `must_wait_for_approval` flag -- [x] `brain` field (provider:model format) with validation -- [x] `sleeping_until` / `sleeping_reason` for sleep/wake cycle -- [x] Settings tab in Agent/Show.vue - -#### Frontend — Agent Detail Page (supersedes Phase 4) -- [x] `Agent/Show.vue` — full agent detail page with tabs: Overview, Tasks, Identity, Capabilities, Activity, Settings -- [x] Real API data (not mocks) for all sections -- [x] `AgentCapabilities.vue` — tool toggles with app grouping -- [x] `AgentSettingsPanel.vue` — behavior mode, brain selector, delete agent -- [x] Task list with step tracking - -#### Core Platform (all working) -- [x] Chat with channels, DMs, threads, reactions, attachments -- [x] Documents with versioning, comments, attachments, folder tree -- [x] Lists (kanban) with custom statuses, templates, automation rules -- [x] Tasks (agent work items) with steps, lifecycle, assignment -- [x] Calendar with recurrence, attendees, iCal feeds, import -- [x] Data Tables with 10 column types, 4 view modes, bulk operations -- [x] Approvals with Telegram forwarding -- [x] Activity feed, notifications, search -- [x] Integrations system (Telegram, Plausible configured) -- [x] Auth (login, register, password reset) - -### What Was Dropped / Superseded - -These items from the original plan are **no longer needed**: - -- ~~`agent_configurations` table~~ → superseded by Document-based identity files -- ~~`agent_settings` table~~ → superseded by fields on `users` table -- ~~`capabilities` table~~ → superseded by `agent_permissions` + `ToolRegistry` -- ~~`stats` table~~ → `StatsController` computes everything dynamically -- ~~`AgentConfiguration` model~~ → deleted (cleanup commit 33a0147) -- ~~`AgentSettings` model~~ → deleted -- ~~`Capability` model~~ → deleted -- ~~`Stat` model~~ → deleted -- ~~`AgentPersonalityEditor.vue`~~ → replaced by `AgentIdentityFiles.vue` -- ~~`AgentInstructionsEditor.vue`~~ → replaced by `AgentIdentityFiles.vue` -- ~~`AgentMemoryView.vue`~~ → replaced by MEMORY.md in identity files -- ~~`CapabilitySeeder`~~ → deleted -- ~~Phase 4.2.1-4.2.4~~ → components replaced by identity file editor - ---- - -## Next Up: Priority Implementation Queue - -> **Ordered by impact vs effort. Each item is a self-contained project.** - -### N1. Sub-Agent Spawning -**Impact:** HIGH | **Effort:** MEDIUM | **Priority:** 1 - -The core "Robo-Company" differentiator. A manager agent spawns worker agents into temporary channels, tracks their work, aggregates results. Foundation already exists (`manager_id` column, `directReports()` relationship). - -- [ ] **N1.1** Create `subagent_spawn_permissions` migration - - Fields: `id`, `parent_agent_id` (FK), `allowed_agents` (JSON), `max_concurrent`, `auto_archive_minutes` -- [ ] **N1.2** Create `subagent_runs` migration - - Fields: `id`, `parent_agent_id`, `child_agent_id`, `task_description`, `label`, `status` (pending/running/success/error/timeout/cancelled), `runtime_config` (JSON), `result` (JSON), `created_at`, `completed_at` -- [ ] **N1.3** Create `SubagentSpawnPermission` and `SubagentRun` models -- [ ] **N1.4** Create `SubagentSpawnService` - - Enforce spawn permissions (allowed_agents, max_concurrent) - - Create ephemeral channel for parent↔child communication - - Dispatch child agent task via queue - - Track parent-child relationship in `subagent_runs` - - Handle timeout and cancellation -- [ ] **N1.5** Create `SpawnSubagent` agent tool - - Allows manager agents to spawn workers via tool call - - Parameters: child_agent_id, task_description, timeout_minutes - - Returns run ID for tracking -- [ ] **N1.6** Create `SubagentController` API - - `GET /api/agents/{id}/spawn-permissions` — get spawn permissions - - `PUT /api/agents/{id}/spawn-permissions` — update permissions - - `POST /api/agents/{id}/spawn` — spawn subagent - - `GET /api/agents/{id}/subagent-runs` — list runs - - `POST /api/subagent-runs/{id}/cancel` — cancel running subagent -- [ ] **N1.7** Frontend: spawn dialog, running subagents list, result announcements - - Spawn button on agent page (disabled if no spawn permissions) - - Real-time status updates via WebSocket - - Result announcement in parent agent's chat -- [ ] **N1.8** Add spawn permissions UI to Agent/Show.vue Settings tab - -### N2. MCP Server -**Impact:** HIGH | **Effort:** LOW-MEDIUM | **Priority:** 2 - -Expose OpenCompany as an MCP server so external AI tools (Claude Desktop, Cursor, VS Code Copilot) can interact with the workspace. High developer appeal and unique positioning. - -- [ ] **N2.1** Install `laravel/mcp` package - ```bash - composer require laravel/mcp - ``` -- [ ] **N2.2** Create MCP server configuration - - Define available resources: documents, channels, tasks, list items, agents - - Define available tools: search_documents, create_task, send_message, create_list_item, query_table -- [ ] **N2.3** Create MCP tool implementations - - `SearchDocuments` — search workspace documents - - `ReadDocument` — read a specific document - - `CreateListItem` — create kanban items - - `SendMessage` — send messages to channels - - `CreateTask` — create agent tasks - - `QueryTable` — query data tables -- [ ] **N2.4** Create MCP resource providers - - Documents resource (list, read) - - Channels resource (list, read messages) - - Agents resource (list, status) - - Tasks resource (list, read) -- [ ] **N2.5** Add authentication (API token-based) -- [ ] **N2.6** Add MCP server settings to Settings page - - Enable/disable MCP server - - Generate/revoke API tokens - - Show connection URL for clients - -### N3. Memory & Vector Search (Hybrid) -**Impact:** HIGH | **Effort:** HIGH | **Priority:** 3 - -Agents currently have no semantic memory beyond plain MEMORY.md text. Adding pgvector + hybrid search enables agents to recall past conversations and learnings by meaning, not just keywords. - -- [ ] **N3.1** Install pgvector extension - ```sql - CREATE EXTENSION IF NOT EXISTS vector; - ``` -- [ ] **N3.2** Create `memory_chunks` migration - - Fields: `id`, `agent_id`, `source_type` (identity/memory/session), `source_id`, `document_id` (FK), `start_line`, `end_line`, `content_hash`, `text`, `embedding` VECTOR(1536) -- [ ] **N3.3** Create `embedding_cache` migration - - Fields: `provider`, `model`, `content_hash`, `embedding` VECTOR(1536), `dims` - - Primary key on (provider, model, content_hash) -- [ ] **N3.4** Create `EmbeddingService` - - Generate embeddings via OpenAI text-embedding-3-small (or configured provider) - - Batch mode for multiple texts - - Cache layer using `embedding_cache` table -- [ ] **N3.5** Create `ChunkingService` - - Split long texts into ~400 token chunks with 80 token overlap - - Track start/end line numbers - - Content hashing for change detection -- [ ] **N3.6** Create `MemoryIndexService` - - `indexDocument($agentId, $docId)` — chunk + embed single document - - `reindexAgent($agentId)` — full reindex - - Background job dispatch for async indexing -- [ ] **N3.7** Create `HybridMemorySearch` service - - Vector similarity via pgvector `<=>` operator - - Full-text search via `ts_rank` + `to_tsvector` - - Combined scoring: 0.7 vector + 0.3 text - - Result clamping: max 6 results, 700 chars per snippet, 4000 chars total -- [ ] **N3.8** Create `RecallMemory` agent tool - - Allows agents to search their own memory semantically - - Parameters: query, limit, collection (optional) - - Returns ranked results with source citations -- [ ] **N3.9** Create `MemorySearchController` API - - `POST /api/agents/{id}/memory/search` — search agent memory -- [ ] **N3.10** Create `IndexAgentMemoryJob` + `PeriodicReindexJob` - - Index on document create/update (via model observer) - - Periodic reindex every 5 minutes (delta-based) - - Embedding refresh hourly -- [ ] **N3.11** Add Document model observer for auto-indexing - - Trigger on identity/memory document changes - - 15-second debounced dispatch -- [ ] **N3.12** Frontend: `MemorySearchInput.vue` component - - Search input with debounced API calls - - Show matched chunks with source references - -### N4. Test Suite Foundation -**Impact:** MEDIUM | **Effort:** MEDIUM | **Priority:** 4 - -0% test coverage is a risk. Set up PHPUnit feature tests for the most critical API endpoints and establish patterns for future tests. - -- [ ] **N4.1** Configure test environment - - SQLite in-memory for speed - - Test factories for User, Channel, Message, Document, Task, ListItem - - Base test case with auth helpers -- [ ] **N4.2** Create model factories - - `UserFactory` (human + agent variants) - - `ChannelFactory` (public, private, dm) - - `MessageFactory` - - `DocumentFactory` (file + folder) - - `TaskFactory` + `TaskStepFactory` - - `ListItemFactory` + `ListStatusFactory` - - `CalendarEventFactory` - - `DataTableFactory` + `DataTableColumnFactory` + `DataTableRowFactory` -- [x] **N4.3** Agent API tests — ✅ 20+ test files exist in tests/Feature/ and tests/Feature/Tools/ - - `AgentControllerTest` — CRUD agents, identity files, show endpoint - - `AgentPermissionControllerTest` — tool/channel/folder permission toggles - - `AgentChatServiceTest` — message dispatch triggers agent response -- [ ] **N4.4** Core API tests - - `ChannelControllerTest` — CRUD, members, read markers - - `MessageControllerTest` — CRUD, reactions, threads, attachments - - `DocumentControllerTest` — CRUD, versions, comments, folder tree - - `ListItemControllerTest` — CRUD, reorder, status changes - - `TaskControllerTest` — CRUD, lifecycle (start/complete/fail), steps -- [ ] **N4.5** Calendar & Table API tests - - `CalendarEventControllerTest` — CRUD, recurrence, attendees, feeds - - `DataTableControllerTest` — CRUD, columns, rows, bulk operations -- [ ] **N4.6** Integration tests - - `ApprovalFlowTest` — create approval → approve/reject → agent resumes - - `AgentToolExecutionTest` — agent uses tools correctly -- [ ] **N4.7** Set up CI pipeline (GitHub Actions) - - Run tests on push/PR - - Report coverage - -### N5. Quick Wins & Polish -**Impact:** VISIBLE | **Effort:** LOW | **Priority:** 5 - -Small changes that immediately improve the demo experience and align code with documentation. - -- [ ] **N5.1** Seed a `coordinator` agent in `UserSeeder` - - All 7 TypeScript agent types now demonstrated -- [ ] **N5.2** Seed a `private` channel in `ChannelSeeder` - - All channel types visible in demos -- [ ] **N5.3** Add `TaskStep` records to `AgentTaskSeeder` - - Task detail view shows step tracking (action, decision, approval steps) -- [ ] **N5.4** Align `ExternalChannelProvider` type with reality - - Only list implemented providers (telegram, slack) — remove or comment out others -- [ ] **N5.5** Add Data Tables section to `features.md` - - Major built feature gets marketing visibility -- [ ] **N5.6** Add Calendar section to `features.md` - - Built feature gets marketing visibility -- [ ] **N5.7** Update `emergent.md` risk assessment - - Agent execution engine is now built (was listed as CRITICAL gap) - - Update "No Agent Execution Engine" section to reflect current state -- [ ] **N5.8** Rename automation triggers for clarity - - `task_created` → `list_item_created` - - `assign_task` → `assign_list_item` - - Aligns with the Tasks vs ListItems naming convention - -### N6. External Channel: Discord -**Impact:** MEDIUM | **Effort:** MEDIUM | **Priority:** 6 - -Prove the external channel architecture scales beyond Telegram. Discord is where the AI/developer community lives. - -- [ ] **N6.1** Create `DiscordService` (similar to `TelegramService`) - - Bot token management - - Send/receive messages via Discord API - - Channel mapping (Discord channel ↔ OpenCompany channel) -- [ ] **N6.2** Create `DiscordWebhookController` - - Receive Discord gateway events - - Route messages to appropriate channels - - Handle Discord-specific formatting (embeds, mentions) -- [ ] **N6.3** Create Discord integration settings - - Add to `IntegrationSeeder` — bot token, guild ID, channel mappings - - Add Discord configuration UI to Integrations page -- [ ] **N6.4** Create `SendDiscordNotification` agent tool - - Similar to `SendTelegramNotification` - - Support Discord embeds for rich formatting -- [ ] **N6.5** Update `ExternalChannelProvider` type - - Add `discord` to TypeScript union type - - Update channel creation flow to support Discord channels -- [ ] **N6.6** Test bidirectional message flow - - Message in Discord → appears in OpenCompany channel - - Agent response in OpenCompany → appears in Discord diff --git a/docs/planning/kosmokrator-runtime-alignment-checklist.md b/docs/planning/kosmokrator-runtime-alignment-checklist.md new file mode 100644 index 0000000..8d55f21 --- /dev/null +++ b/docs/planning/kosmokrator-runtime-alignment-checklist.md @@ -0,0 +1,49 @@ +# KosmoKrator Runtime Alignment Checklist + +> Implementation checklist for aligning OpenCompany's context, compaction, pruning, and prompt-caching runtime with the reusable parts of KosmoKrator and `prism-relay`. + +## Status + +Phases 1-5 are largely complete. Remaining open items are tracked in the [OpenCompany Plane project](https://plane.gingermedia.biz/kosmokrator/projects/ceaf5d22-612a-42bf-9cc8-0dac054cdf0c/issues/): + +| Issue | Open item | Phase | +|-------|-----------|-------| +| OC-17 | Remove duplicated built-in context-window assumptions | 1 | +| OC-18 | ContextPruner: protect recent user turns and already-truncated entries | 3 | +| OC-19 | CompactionPlan: preserve protected context during compaction | 4 | +| OC-20 | Missing test coverage: compaction plan building + memory extraction | 4 | +| OC-48 | Prompt cache metrics not wired to token metrics path | 5 | + +## Completed work + +### Phase 1 — Relay-Backed Context Windows +- Refactored `ModelContextRegistry` as relay-backed adapter +- Reads defaults from `OpenCompany\PrismRelay\Meta\ProviderMeta` +- `AppSetting` overrides as top-priority layer +- Callers pass provider + model +- Tests: relay exact match, admin override precedence, unknown model fallback + +### Phase 2 — Shared Context Budget (complete) +- `ContextBudget.php` centralizes all threshold math +- Consumed by `ConversationCompactionService`, `MemoryFlushService`, `AgentRespondJob` + +### Phase 3 — Context Pruning (partial) +- `ContextPruner.php` scoped to OpenCompany `read` tools +- Integrated into checkpoint/history loading path +- Minimum savings threshold enforced + +### Phase 4 — Compaction Pipeline (partial) +- `CompactionPlan.php` + `CompactionMemoryExtractor.php` +- Failure counting / circuit breaker +- Durable memory extraction from summaries + +### Phase 5 — Prompt Cache Planning (partial) +- Prompt splitting in `OpenCompanyAgent` +- Extended Laravel AI Prism for multiple system prompts +- Split prompts fed to `Relay::planPromptCache()` + +## Notes + +- Prompt splitting alone is not the same as provider-side prompt caching. +- `prism-relay` contains the planner/orchestrator; OpenCompany mainly needs to use it on its real request path. +- OpenCompany pruning rules are based on OpenCompany tools, not KosmoKrator shell tools. diff --git a/docs/planning/memory-implementation.md b/docs/planning/memory-implementation.md index f6d4d6b..b252a45 100644 --- a/docs/planning/memory-implementation.md +++ b/docs/planning/memory-implementation.md @@ -1,10 +1,9 @@ -# Memory, Compaction & Embeddings — Implementation Guide +# Memory, Compaction & Embeddings — Architecture Reference -> Phased implementation plan for agent memory, document embeddings, conversation compaction, and hybrid search. Each phase is a standalone PR-able unit. Later phases depend on earlier ones. +> Architecture overview for agent memory, document embeddings, conversation compaction, and hybrid search. **Status**: Complete (all 6 phases implemented) **Config**: `config/memory.php` (comprehensive: embedding, chunking, search, reranking, compaction, memory_flush, context_windows, scope) -**Reference**: OpenClaw memory system (`inspiration/openclaw/src/memory/`, `inspiration/openclaw/docs/concepts/memory.md`; updated for v2026.2.9) ### Implementation Summary @@ -36,7 +35,7 @@ Agents have two distinct memory systems, mirroring how human memory works: - **What**: The current conversation messages loaded into the context window - **Scope**: Single channel, single session - **Lifetime**: Ephemeral — exists only while the context window holds it -- **Managed by**: `ChannelConversationLoader` (Phase 4: compaction keeps it within budget) +- **Managed by**: `ChannelConversationLoader` (compaction keeps it within budget) - **Storage**: `messages` table → loaded into context window at prompt time - **Capacity**: Limited by model context window (e.g. 128K tokens) - **When full**: Older messages are summarized into a `ConversationSummary` and replaced @@ -46,12 +45,12 @@ Agents have two distinct memory systems, mirroring how human memory works: - **What**: Explicitly saved facts, preferences, decisions, learnings - **Scope**: Per-agent, accessible across all conversations - **Lifetime**: Permanent — persists until explicitly deleted -- **Managed by**: `SaveMemory` / `RecallMemory` tools (Phase 3) +- **Managed by**: `SaveMemory` / `RecallMemory` tools - **Storage**: `agents/*/memory/YYYY-MM-DD.md` documents → chunked & embedded in `document_chunks` - **Capacity**: Unlimited (PostgreSQL + pgvector) - **Retrieval**: Semantic search (vector similarity), not loaded by default — agents must actively recall -### The Bridge: STM → LTM Promotion (Phase 5) +### The Bridge: STM → LTM Promotion Before conversation compaction discards older messages, the **Memory Flush** gives the agent a silent turn to review what's about to be lost and `save_memory` anything important. This is the automatic promotion path from short-term to long-term memory. @@ -61,28 +60,25 @@ Before conversation compaction discards older messages, the **Memory Flush** giv │ │ │ ┌──────────────────┐ flush ┌──────────────────┐ │ │ │ Short-Term (STM) │ ─────────► │ Long-Term (LTM) │ │ -│ │ │ Phase 5 │ │ │ +│ │ │ │ │ │ │ │ Conversation │ │ Saved memories │ │ │ │ messages in │ │ in document_chunks│ │ │ │ context window │ │ (pgvector) │ │ │ │ │ │ │ │ │ │ Compacted when │ recall │ Recalled via │ │ │ │ approaching limit │ ◄───────── │ semantic search │ │ -│ │ (Phase 4) │ Phase 3 │ on demand │ │ +│ │ │ │ on demand │ │ │ └──────────────────┘ └──────────────────┘ │ │ │ │ ┌──────────────────────────────────────────────────┐ │ │ │ Document Knowledge Base │ │ │ │ Shared workspace docs, indexed with embeddings │ │ │ │ Searchable via SearchDocuments (semantic mode) │ │ -│ │ (Phase 2) │ │ │ └──────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────┘ ``` ---- - -## Phase Dependencies +### Phase Dependencies ``` Phase 1 (Foundation) @@ -92,1846 +88,3 @@ Phase 1 (Foundation) └── Phase 4 (Conversation Compaction) — STM management └── Phase 5 (STM → LTM Flush) — requires Phase 3 + 4 ``` - ---- - -## Phase 1: Foundation — pgvector, Chunking & Embedding Services - -### Goal -Install pgvector, create the storage tables, and build the core chunking and embedding services that all later phases depend on. - -### Database - -**Migration: `create_document_chunks_table`** - -```php -// Enable pgvector extension -DB::statement('CREATE EXTENSION IF NOT EXISTS vector'); - -Schema::create('document_chunks', function (Blueprint $table) { - $table->uuid('id')->primary(); - $table->string('document_id'); // FK → documents.id - $table->text('content'); // The chunk text - $table->string('content_hash', 64); // SHA256 of content (for dedup) - $table->vector('embedding', 1536); // pgvector column - $table->string('collection')->default('general'); // 'general', 'memory', 'identity' - $table->string('agent_id')->nullable(); // Scoping: null = shared, set = agent-private - $table->integer('chunk_index')->default(0); // Position within the source document - $table->jsonb('metadata')->nullable(); // Title, path, dates, etc. - $table->timestamps(); - - $table->foreign('document_id')->references('id')->on('documents')->cascadeOnDelete(); - $table->foreign('agent_id')->references('id')->on('users')->nullOnDelete(); - - // Indexes - $table->index(['collection', 'agent_id']); - $table->index('content_hash'); - $table->index('document_id'); -}); - -// HNSW index for fast cosine similarity search -DB::statement('CREATE INDEX document_chunks_embedding_idx ON document_chunks USING hnsw (embedding vector_cosine_ops)'); -``` - -**Migration: `create_embedding_cache_table`** - -```php -Schema::create('embedding_cache', function (Blueprint $table) { - $table->string('id', 64)->primary(); // SHA256(provider + model + content) - $table->string('provider', 50); // e.g. 'openai' - $table->string('model', 100); // e.g. 'text-embedding-3-small' - $table->vector('embedding', 1536); - $table->timestamps(); -}); -``` - -### Models - -**`app/Models/DocumentChunk.php`** - -```php -class DocumentChunk extends Model -{ - use HasUuids; - - protected $fillable = [ - 'id', 'document_id', 'content', 'content_hash', 'embedding', - 'collection', 'agent_id', 'chunk_index', 'metadata', - ]; - - protected function casts(): array - { - return [ - 'embedding' => 'array', - 'metadata' => 'array', - ]; - } - - public function document(): BelongsTo - { - return $this->belongsTo(Document::class); - } - - public function agent(): BelongsTo - { - return $this->belongsTo(User::class, 'agent_id'); - } -} -``` - -**`app/Models/EmbeddingCache.php`** - -```php -class EmbeddingCache extends Model -{ - protected $table = 'embedding_cache'; - protected $keyType = 'string'; - public $incrementing = false; - - protected $fillable = ['id', 'provider', 'model', 'embedding']; - - protected function casts(): array - { - return ['embedding' => 'array']; - } - - /** - * Generate a cache key from provider + model + content. - */ - public static function cacheKey(string $provider, string $model, string $content): string - { - return hash('sha256', "{$provider}:{$model}:{$content}"); - } -} -``` - -### Services - -**`app/Services/Memory/ChunkingService.php`** - -Splits markdown text into overlapping chunks. Configuration comes from `config('memory.chunking')`. - -```php -class ChunkingService -{ - /** - * Split text into overlapping chunks. - * - * @return array Ordered list of chunk strings - */ - public function chunk(string $text): array - { - $maxSize = config('memory.chunking.max_chunk_size', 512); // tokens approx - $overlap = config('memory.chunking.chunk_overlap', 64); - $separator = config('memory.chunking.separator', "\n\n"); - - // 1. Split on separator (paragraph breaks) - $paragraphs = array_filter(explode($separator, $text), fn ($p) => trim($p) !== ''); - - // 2. Greedily merge paragraphs into chunks of ~maxSize tokens - // Estimate tokens as wordcount * 1.3 (conservative for English) - $chunks = []; - $current = ''; - - foreach ($paragraphs as $para) { - $candidate = $current === '' ? $para : $current . $separator . $para; - if ($this->estimateTokens($candidate) > $maxSize && $current !== '') { - $chunks[] = trim($current); - // Overlap: keep the last ~overlap tokens worth of text - $current = $this->takeTrailing($current, $overlap) . $separator . $para; - } else { - $current = $candidate; - } - } - - if (trim($current) !== '') { - $chunks[] = trim($current); - } - - return $chunks; - } - - private function estimateTokens(string $text): int - { - return (int) ceil(str_word_count($text) * 1.3); - } - - private function takeTrailing(string $text, int $tokenCount): string - { - $words = explode(' ', $text); - $wordCount = (int) ceil($tokenCount / 1.3); - return implode(' ', array_slice($words, -$wordCount)); - } -} -``` - -**`app/Services/Memory/EmbeddingService.php`** - -Generates embeddings via OpenAI (or other providers), with a database cache layer. - -```php -class EmbeddingService -{ - /** - * Get the embedding for a single text. - * Returns from cache if available, otherwise calls the API and caches. - * - * @return array Vector of floats (1536 dimensions) - */ - public function embed(string $text): array - { - $provider = config('memory.embedding.provider', 'openai'); - $model = config('memory.embedding.model', 'text-embedding-3-small'); - - $cacheKey = EmbeddingCache::cacheKey($provider, $model, $text); - $cached = EmbeddingCache::find($cacheKey); - - if ($cached) { - return $cached->embedding; - } - - $embedding = $this->callProvider($provider, $model, $text); - - EmbeddingCache::create([ - 'id' => $cacheKey, - 'provider' => $provider, - 'model' => $model, - 'embedding' => $embedding, - ]); - - return $embedding; - } - - /** - * Batch embed multiple texts. - * Checks cache first, only calls API for uncached texts. - * - * @return array> Embeddings in the same order as input - */ - public function embedBatch(array $texts): array - { - $provider = config('memory.embedding.provider', 'openai'); - $model = config('memory.embedding.model', 'text-embedding-3-small'); - - $results = []; - $uncached = []; - - foreach ($texts as $i => $text) { - $cacheKey = EmbeddingCache::cacheKey($provider, $model, $text); - $cached = EmbeddingCache::find($cacheKey); - if ($cached) { - $results[$i] = $cached->embedding; - } else { - $uncached[$i] = $text; - } - } - - if (!empty($uncached)) { - $batchEmbeddings = $this->callProviderBatch($provider, $model, array_values($uncached)); - $j = 0; - foreach ($uncached as $i => $text) { - $embedding = $batchEmbeddings[$j++]; - $results[$i] = $embedding; - - EmbeddingCache::create([ - 'id' => EmbeddingCache::cacheKey($provider, $model, $text), - 'provider' => $provider, - 'model' => $model, - 'embedding' => $embedding, - ]); - } - } - - ksort($results); - return $results; - } - - private function callProvider(string $provider, string $model, string $text): array - { - // OpenAI embeddings API call - // Use Laravel HTTP client: Http::withToken(config('services.openai.api_key')) - // ->post('https://api.openai.com/v1/embeddings', [...]) - // Return the embedding vector - } - - private function callProviderBatch(string $provider, string $model, array $texts): array - { - // OpenAI supports batch in a single request (up to 2048 inputs) - // Return array of embedding vectors - } -} -``` - -### Artisan Command - -**`app/Console/Commands/MemoryStatus.php`** - -```bash -php artisan memory:status -``` - -Displays: -- pgvector extension installed (yes/no) -- `document_chunks` row count, broken down by collection -- `embedding_cache` row count -- Config values from `config/memory.php` - -### Config Updates - -No changes needed --- `config/memory.php` already has the correct structure: -- `embedding.provider` = `openai` -- `embedding.model` = `text-embedding-3-small` -- `embedding.dimensions` = `1536` -- `chunking.max_chunk_size` = `512` -- `chunking.chunk_overlap` = `64` -- `chunking.separator` = `\n\n` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `ChunkingServiceTest` | Paragraphs split correctly, overlap works, empty input, single paragraph | -| `EmbeddingServiceTest` | Cache hit returns cached, cache miss calls API & caches, batch embedding | -| `MemoryStatusCommandTest` | Command runs without error, outputs expected info | -| Migration test | Tables created with correct columns, pgvector extension enabled | - -### Files to Create - -``` -database/migrations/YYYY_MM_DD_000001_create_document_chunks_table.php -database/migrations/YYYY_MM_DD_000002_create_embedding_cache_table.php -app/Models/DocumentChunk.php -app/Models/EmbeddingCache.php -app/Services/Memory/ChunkingService.php -app/Services/Memory/EmbeddingService.php -app/Console/Commands/MemoryStatus.php -tests/Feature/Services/Memory/ChunkingServiceTest.php -tests/Feature/Services/Memory/EmbeddingServiceTest.php -``` - -### Design Notes - -> **v2026.2.9 alignment:** OpenClaw changed batch embeddings to disabled by default (opt-in for backfills). Our `EmbeddingService.embedBatch()` remains available for `DocumentIndexingService.index()` in Phase 2 and the `memory:index-documents --fresh` backfill command, but individual embedding calls use `embed()`. This aligns with upstream's rationale: batch is mainly beneficial for large backfills; sync is adequate for incremental updates. - ---- - -## Phase 2: Document Embeddings — Index, Observe, Search - -### Goal -Automatically chunk and embed workspace documents so the `search_documents` tool can do semantic search in addition to keyword search. - -### Depends On -Phase 1 (ChunkingService, EmbeddingService, DocumentChunk model) - -### Services - -**`app/Services/Memory/DocumentIndexingService.php`** - -Orchestrates chunking + embedding + storage for a Document. - -```php -class DocumentIndexingService -{ - public function __construct( - private ChunkingService $chunker, - private EmbeddingService $embedder, - ) {} - - /** - * Index a document: chunk its content, embed, and store as DocumentChunks. - */ - public function index(Document $document, string $collection = 'general', ?string $agentId = null): void - { - // 1. Delete existing chunks for this document - DocumentChunk::where('document_id', $document->id)->delete(); - - if (empty(trim($document->content))) { - return; - } - - // 2. Chunk - $chunks = $this->chunker->chunk($document->content); - - // 3. Embed all chunks in batch - $embeddings = $this->embedder->embedBatch($chunks); - - // 4. Store - foreach ($chunks as $i => $chunkText) { - DocumentChunk::create([ - 'document_id' => $document->id, - 'content' => $chunkText, - 'content_hash' => hash('sha256', $chunkText), - 'embedding' => $embeddings[$i], - 'collection' => $collection, - 'agent_id' => $agentId, - 'chunk_index' => $i, - 'metadata' => [ - 'title' => $document->title, - 'updated_at' => $document->updated_at?->toIso8601String(), - ], - ]); - } - } - - /** - * Remove all chunks for a document. - */ - public function deindex(Document $document): void - { - DocumentChunk::where('document_id', $document->id)->delete(); - } - - /** - * Semantic search across document chunks. - * - * @return Collection Ordered by similarity (highest first) - */ - public function search( - string $query, - string $collection = 'general', - ?string $agentId = null, - int $limit = 10, - float $minSimilarity = 0.5, - ): Collection { - $queryEmbedding = $this->embedder->embed($query); - $vectorString = '[' . implode(',', $queryEmbedding) . ']'; - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->selectRaw('*, 1 - (embedding <=> ?) as similarity', [$vectorString]) - ->having('similarity', '>=', $minSimilarity) - ->orderByDesc('similarity') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } -} -``` - -### Observer - -**`app/Observers/DocumentObserver.php`** - -```php -class DocumentObserver -{ - /** - * Auto-index when a non-folder document is saved/updated. - */ - public function saved(Document $document): void - { - if ($document->is_folder) { - return; - } - - // Determine collection based on document location - $collection = $this->resolveCollection($document); - $agentId = $this->resolveAgentId($document); - - IndexDocumentJob::dispatch($document, $collection, $agentId); - } - - public function deleted(Document $document): void - { - DocumentChunk::where('document_id', $document->id)->delete(); - } - - /** - * Resolve collection type based on document's folder hierarchy. - * - agents/{slug}/memory/* → 'memory' - * - agents/{slug}/identity/* → 'identity' - * - everything else → 'general' - */ - private function resolveCollection(Document $document): string - { - // Walk up the parent chain to detect if inside agents/*/memory or agents/*/identity - $parent = $document->parent; - while ($parent) { - if ($parent->title === 'memory' && $parent->is_folder) { - return 'memory'; - } - if ($parent->title === 'identity' && $parent->is_folder) { - return 'identity'; - } - $parent = $parent->parent; - } - return 'general'; - } - - /** - * Resolve agent owner if this document lives under agents/{slug}/. - */ - private function resolveAgentId(Document $document): ?string - { - // Walk up the parent chain to find the agent folder - $parent = $document->parent; - while ($parent) { - if ($parent->parent?->title === 'agents') { - // This is the agent folder — find the agent by slug - $agent = User::where('is_agent', true) - ->whereRaw("LOWER(REPLACE(name, ' ', '-')) = ?", [strtolower($parent->title)]) - ->first(); - return $agent?->id; - } - $parent = $parent->parent; - } - return null; - } -} -``` - -Register in `AppServiceProvider::boot()`: -```php -Document::observe(DocumentObserver::class); -``` - -### Job - -**`app/Jobs/IndexDocumentJob.php`** - -```php -class IndexDocumentJob implements ShouldQueue -{ - use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; - - public int $tries = 3; - public array $backoff = [10, 30]; - - public function __construct( - private Document $document, - private string $collection = 'general', - private ?string $agentId = null, - ) {} - - public function handle(DocumentIndexingService $indexer): void - { - $indexer->index($this->document, $this->collection, $this->agentId); - } -} -``` - -### SearchDocuments Tool Update - -Add a `mode` parameter to the existing `SearchDocuments` tool (`app/Agents/Tools/Docs/SearchDocuments.php`): - -```php -public function schema(JsonSchema $schema): array -{ - return [ - 'query' => $schema->string()->description('The search query.')->required(), - 'mode' => $schema->string()->description( - 'Search mode: "keyword" (ILIKE), "semantic" (vector similarity), or "auto" (semantic first, keyword fallback). Default: auto.' - ), - 'limit' => $schema->integer()->description('Maximum number of results. Default: 5.'), - ]; -} -``` - -In `handle()`, when `mode` is `semantic` or `auto`: -1. Call `DocumentIndexingService::search($query, 'general', null, $limit)` -2. Map results to the same output format (title, snippet, ID) -3. If `auto` and semantic returns < 2 results, fall back to keyword search and merge - -### Artisan Command - -**`app/Console/Commands/MemoryIndexDocuments.php`** - -```bash -php artisan memory:index-documents [--fresh] -``` - -- Iterates all non-folder documents -- Dispatches `IndexDocumentJob` for each -- `--fresh` flag deletes all existing chunks first -- Shows progress bar - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `DocumentIndexingServiceTest` | Index creates chunks, deindex removes them, search returns ranked results | -| `DocumentObserverTest` | Save triggers index job, delete removes chunks, folders are skipped | -| `SearchDocumentsSemanticTest` | Semantic mode returns relevant results, auto fallback works | -| `IndexDocumentJobTest` | Job calls indexer with correct parameters | - -### Files to Create/Modify - -``` -app/Services/Memory/DocumentIndexingService.php (new) -app/Observers/DocumentObserver.php (new) -app/Jobs/IndexDocumentJob.php (new) -app/Console/Commands/MemoryIndexDocuments.php (new) -app/Agents/Tools/Docs/SearchDocuments.php (modify — add mode param) -app/Providers/AppServiceProvider.php (modify — register observer) -tests/Feature/Services/Memory/DocumentIndexingServiceTest.php -tests/Feature/Observers/DocumentObserverTest.php -``` - ---- - -## Phase 3: Long-Term Memory (LTM) — Save & Recall Tools - -### Goal -Give agents a **long-term memory** system: `save_memory` to persist durable memories that survive across conversations, and `recall_memory` to semantically search them on demand. Unlike short-term memory (the conversation context), LTM is permanent and must be explicitly saved and recalled. - -### Depends On -Phase 1 (ChunkingService, EmbeddingService), Phase 2 (DocumentIndexingService) - -### LTM File Layout (OpenClaw Pattern) - -Agents have two LTM storage layers, mirroring OpenClaw's approach: - -| File | Purpose | Loaded | Write pattern | -|------|---------|--------|---------------| -| `agents/{slug}/identity/MEMORY.md` | **Curated** long-term memory — high-value preferences, decisions, key facts | Always (part of system prompt via identity files) | Overwrite/update (curated) | -| `agents/{slug}/memory/YYYY-MM-DD.md` | **Daily logs** — running context, timestamped entries | Not auto-loaded; searchable via `recall_memory` | Append-only | - -The key difference: `MEMORY.md` is always in the agent's system prompt (already loaded by `OpenCompanyAgent::instructions()`), so its contents are "always remembered." Daily logs must be actively recalled via semantic search. - -The `save_memory` tool writes to daily logs by default (`target: "log"`). For high-value curated info, the agent can write to `MEMORY.md` (`target: "core"`), which updates the persistent identity file. - -### Tools - -**`app/Agents/Tools/Memory/SaveMemory.php`** - -```php -class SaveMemory implements Tool -{ - public function __construct( - private User $agent, - private AgentDocumentService $docService, - private DocumentIndexingService $indexer, - ) {} - - public function description(): string - { - return 'Save a durable memory that persists across conversations. Use target "core" for high-value facts that should always be remembered (written to MEMORY.md), or "log" for timestamped daily entries (searchable via recall_memory).'; - } - - public function handle(Request $request): string - { - $content = $request['content']; - $category = $request['category'] ?? 'general'; - $target = $request['target'] ?? 'log'; - - if ($target === 'core') { - // Write to MEMORY.md (curated, always in system prompt) - $memoryFile = $this->docService->getIdentityFile($this->agent, 'MEMORY'); - if (!$memoryFile) { - return 'Error: MEMORY.md not found for this agent.'; - } - - // Append to MEMORY.md under the appropriate section - $newContent = $memoryFile->content . "\n\n### {$category}\n{$content}"; - $this->docService->updateIdentityFile($this->agent, 'MEMORY', $newContent); - - // Index for semantic search - $this->indexer->index($memoryFile->fresh(), 'identity', $this->agent->id); - - return "Core memory saved to MEMORY.md (always loaded in system prompt)."; - } - - // Default: write to daily log - $entry = "### [{$category}] " . now()->format('H:i') . "\n\n{$content}"; - $doc = $this->docService->createMemoryLog($this->agent, $entry); - - if (!$doc) { - return 'Error: Could not save memory. Agent document structure may not be initialized.'; - } - - // Index the memory for semantic recall - $this->indexer->index($doc, 'memory', $this->agent->id); - - return "Memory saved to {$doc->title} (recallable via recall_memory)."; - } - - public function schema(JsonSchema $schema): array - { - return [ - 'content' => $schema->string() - ->description('The memory content to save. Be specific and include context.') - ->required(), - 'category' => $schema->string() - ->description('Category tag: "preference", "decision", "learning", "fact", or "general". Default: general.'), - 'target' => $schema->string() - ->description('Where to save: "core" writes to MEMORY.md (always loaded in your system prompt — use for high-value durable facts), "log" appends to daily log (searchable via recall_memory — use for running context). Default: log.'), - ]; - } -} -``` - -**`app/Agents/Tools/Memory/RecallMemory.php`** - -```php -class RecallMemory implements Tool -{ - public function __construct( - private User $agent, - private DocumentIndexingService $indexer, - ) {} - - public function description(): string - { - return 'Search your long-term memory for relevant past information, decisions, and learnings.'; - } - - public function handle(Request $request): string - { - $query = $request['query']; - $limit = $request['limit'] ?? 6; - - $results = $this->indexer->search( - query: $query, - collection: 'memory', - agentId: $this->agent->id, - limit: $limit, - minSimilarity: config('memory.search.min_similarity', 0.5), - ); - - if ($results->isEmpty()) { - return "No memories found matching '{$query}'."; - } - - // Apply result clamping: max snippet chars = 700, max total injected = 4000 - $maxSnippet = 700; - $maxTotal = 4000; - $totalChars = 0; - $output = []; - - foreach ($results as $chunk) { - $snippet = Str::limit($chunk->content, $maxSnippet); - $date = $chunk->metadata['updated_at'] ?? 'unknown date'; - $similarity = round($chunk->similarity * 100); - $entry = "**{$date}** ({$similarity}% match)\n{$snippet}"; - - if ($totalChars + strlen($entry) > $maxTotal) { - break; - } - $output[] = $entry; - $totalChars += strlen($entry); - } - - return "Found " . count($output) . " memory/memories:\n\n" . implode("\n\n---\n\n", $output); - } - - public function schema(JsonSchema $schema): array - { - return [ - 'query' => $schema->string() - ->description('What to search for in your memories.') - ->required(), - 'limit' => $schema->integer() - ->description('Maximum number of memories to return. Default: 6.'), - ]; - } -} -``` - -### ToolRegistry Updates - -In `app/Agents/Tools/ToolRegistry.php`: - -1. Add to `APP_GROUPS`: -```php -'memory' => [ - 'tools' => ['save_memory', 'recall_memory'], - 'label' => 'save, recall', - 'description' => 'Long-term agent memory', -], -``` - -2. Add to `APP_ICONS`: -```php -'memory' => 'ph:brain', -``` - -3. Add to `TOOL_MAP`: -```php -'save_memory' => [ - 'class' => SaveMemory::class, - 'type' => 'write', - 'name' => 'Save Memory', - 'description' => 'Save a durable memory that persists across conversations.', - 'icon' => 'ph:brain', -], -'recall_memory' => [ - 'class' => RecallMemory::class, - 'type' => 'read', - 'name' => 'Recall Memory', - 'description' => 'Search long-term memory for past information and learnings.', - 'icon' => 'ph:brain', -], -``` - -4. Add to `instantiateTool()` match block: -```php -SaveMemory::class => new SaveMemory($agent, app(AgentDocumentService::class), app(DocumentIndexingService::class)), -RecallMemory::class => new RecallMemory($agent, app(DocumentIndexingService::class)), -``` - -5. Add `'memory'` to the `$displayOrder` in `getAppCatalog()` (after `'agents'`): -```php -['agents', 'memory', 'chat', 'docs', 'tables', 'calendar', 'lists', 'workspace', null], -``` - -### System Prompt Update - -Add memory usage guidance to `OpenCompanyAgent::instructions()`. Append to the system prompt after the MEMORY.md identity file section. See **Appendix B** for the complete system prompt text. - -The system prompt covers: -1. STM vs LTM mental model (what the agent controls vs what's automatic) -2. `save_memory` with `target` ("core" vs "log") and clear guidance on when to use each -3. `recall_memory` with explicit triggers (prior work questions, complex tasks, referenced conversations) -4. A save/don't-save decision guide -5. Clear instruction: "If someone says remember this — save it immediately" - -Key design choice (differs from OpenClaw): Our system prompt is **proactive about saving**, not just recall. OpenClaw's system prompt only mentions recall (`"Before answering anything about prior work... run memory_search"`), relying on docs and flush prompts for save behavior. Our agents don't have filesystem access and can't read docs on demand, so the system prompt must be self-contained. - -### Backfill Command - -**`app/Console/Commands/MemoryIndexLogs.php`** - -```bash -php artisan memory:index-logs [--agent=slug] -``` - -- Finds all memory log documents under `agents/*/memory/` -- Dispatches `IndexDocumentJob` for each with `collection='memory'` and the correct `agent_id` -- Optional `--agent` flag to limit to a specific agent - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `SaveMemoryTest` | Persists to daily log, indexes for recall, handles missing doc structure | -| `RecallMemoryTest` | Returns ranked results, respects agent scoping, applies result clamping | -| `MemoryIndexLogsCommandTest` | Backfill command finds logs and dispatches jobs | - -### Files to Create/Modify - -``` -app/Agents/Tools/Memory/SaveMemory.php (new) -app/Agents/Tools/Memory/RecallMemory.php (new) -app/Agents/Tools/ToolRegistry.php (modify — register tools) -app/Agents/OpenCompanyAgent.php (modify — add memory guidance) -app/Console/Commands/MemoryIndexLogs.php (new) -tests/Feature/Tools/SaveMemoryTest.php -tests/Feature/Tools/RecallMemoryTest.php -``` - ---- - -## Phase 4: Short-Term Memory (STM) — Conversation Compaction - -### Goal -Manage the agent's **short-term memory** (conversation context window) by summarizing older messages when conversations get long. Without compaction, STM simply drops older messages — compaction preserves them as compressed summaries. Summaries are cumulative --- each compaction builds on the previous summary. - -### Depends On -Phase 1 (foundation only --- compaction doesn't require embeddings) - -### Database - -**Migration: `create_conversation_summaries_table`** - -```php -Schema::create('conversation_summaries', function (Blueprint $table) { - $table->uuid('id')->primary(); - $table->string('channel_id'); - $table->string('agent_id'); - $table->longText('summary'); // The cumulative summary text - $table->integer('tokens_before')->default(0); // Token count before compaction - $table->integer('tokens_after')->default(0); // Token count after compaction - $table->integer('compaction_count')->default(0); // How many compaction cycles - $table->integer('messages_summarized')->default(0); // Total messages folded in - $table->string('last_message_id')->nullable(); // Last message included in summary - $table->integer('flush_count')->default(0); // Pre-compaction flushes done (for Phase 5) - $table->timestamps(); - - $table->foreign('channel_id')->references('id')->on('channels')->cascadeOnDelete(); - $table->foreign('agent_id')->references('id')->on('users')->cascadeOnDelete(); - - $table->unique(['channel_id', 'agent_id']); -}); -``` - -### Config Updates - -Add `compaction` section to `config/memory.php`: - -```php -'compaction' => [ - 'enabled' => env('MEMORY_COMPACTION_ENABLED', true), - 'threshold_ratio' => 0.75, // Compact when at 75% of context window - 'keep_ratio' => 0.4, // Keep the most recent 40% of messages - 'context_window' => (int) env('MEMORY_CONTEXT_WINDOW', 128000), // Default token budget - 'summary_model' => env('MEMORY_SUMMARY_MODEL', 'claude-sonnet-4-5-20250929'), - 'summary_max_tokens' => 2000, -], -``` - -> **Design note (v2026.2.9):** OpenClaw added `memory.qmd.update.waitForBootSync` (default `false`) to control whether QMD initialization blocks gateway startup. This is not needed in our architecture — pgvector indexes are always available via PostgreSQL with no cold-start model download or index warm-up step. - -### Model - -**`app/Models/ConversationSummary.php`** - -```php -class ConversationSummary extends Model -{ - use HasUuids; - - protected $fillable = [ - 'id', 'channel_id', 'agent_id', 'summary', - 'tokens_before', 'tokens_after', 'compaction_count', - 'messages_summarized', 'last_message_id', 'flush_count', - ]; - - public function channel(): BelongsTo - { - return $this->belongsTo(Channel::class); - } - - public function agent(): BelongsTo - { - return $this->belongsTo(User::class, 'agent_id'); - } -} -``` - -### Service - -**`app/Services/Memory/ConversationCompactionService.php`** - -```php -class ConversationCompactionService -{ - /** - * Check if compaction is needed for a channel/agent pair. - */ - public function needsCompaction(string $channelId, User $agent, iterable $messages): bool - { - if (!config('memory.compaction.enabled', true)) { - return false; - } - - $totalTokens = $this->estimateTokens($messages); - $threshold = config('memory.compaction.context_window', 128000) - * config('memory.compaction.threshold_ratio', 0.75); - - return $totalTokens > $threshold; - } - - /** - * Perform compaction: summarize older messages, return the summary. - */ - public function compact(string $channelId, User $agent, array $messages): ConversationSummary - { - $keepRatio = config('memory.compaction.keep_ratio', 0.4); - $keepCount = max(3, (int) ceil(count($messages) * $keepRatio)); - $splitIndex = count($messages) - $keepCount; - - // Split: older messages to summarize, recent messages to keep - $toSummarize = array_slice($messages, 0, $splitIndex); - - // Get existing summary (cumulative) - $existing = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - // Build summarization prompt - $previousSummary = $existing?->summary ?? ''; - $summaryText = $this->summarize($toSummarize, $previousSummary); - - $tokensBefore = $this->estimateTokens($messages); - - // Upsert summary - $summary = ConversationSummary::updateOrCreate( - ['channel_id' => $channelId, 'agent_id' => $agent->id], - [ - 'summary' => $summaryText, - 'tokens_before' => $tokensBefore, - 'tokens_after' => $this->estimateTokenCount($summaryText), - 'compaction_count' => ($existing?->compaction_count ?? 0) + 1, - 'messages_summarized' => ($existing?->messages_summarized ?? 0) + count($toSummarize), - 'last_message_id' => end($toSummarize)?->id ?? $existing?->last_message_id, - ] - ); - - return $summary; - } - - /** - * Summarize messages using an LLM call. - */ - private function summarize(array $messages, string $previousSummary): string - { - $prompt = "You are summarizing a conversation for an AI agent's context window.\n\n"; - - if ($previousSummary) { - $prompt .= "Previous summary of even older messages:\n{$previousSummary}\n\n"; - } - - $prompt .= "Messages to summarize:\n"; - foreach ($messages as $msg) { - $role = $msg->role->value ?? 'unknown'; - $content = $msg->content ?? ''; - $prompt .= "[{$role}]: {$content}\n"; - } - - $prompt .= "\nCreate a concise summary that captures:\n"; - $prompt .= "- Key topics discussed\n- Decisions made\n- Action items\n- Important context\n"; - $prompt .= "- User preferences expressed\n\n"; - $prompt .= "Be factual and specific. Preserve names, dates, and technical details."; - - // Call LLM for summarization - // Use the configured summary model - $model = config('memory.compaction.summary_model'); - $maxTokens = config('memory.compaction.summary_max_tokens', 2000); - - // Use Laravel AI SDK to generate summary - // Return the summary text - } - - private function estimateTokens(iterable $messages): int - { - $total = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $total += (int) ceil(str_word_count($content) * 1.3); - } - return $total; - } - - private function estimateTokenCount(string $text): int - { - return (int) ceil(str_word_count($text) * 1.3); - } -} -``` - -### ChannelConversationLoader Update - -Modify `app/Agents/Conversations/ChannelConversationLoader.php` to: - -1. **Prepend existing summary** as the first message if one exists -2. **Only load messages after** the last summarized message -3. **Dispatch compaction job** when approaching the threshold - -```php -class ChannelConversationLoader -{ - private const DEFAULT_LIMIT = 50; // Increase from 20 to load more for compaction context - - public function __construct( - private ConversationCompactionService $compactionService, - ) {} - - public function load(string $channelId, User $agent, int $limit = self::DEFAULT_LIMIT): iterable - { - $sdkMessages = []; - - // 1. Check for existing summary - $summary = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - if ($summary && !empty($summary->summary)) { - // Prepend summary as a system-style user message - $sdkMessages[] = new UserMessage( - "[Conversation Summary — {$summary->messages_summarized} prior messages, " - . "{$summary->compaction_count} compaction(s)]\n\n{$summary->summary}" - ); - } - - // 2. Load messages (after last summarized message if applicable) - $query = Message::where('channel_id', $channelId) - ->orderBy('created_at', 'desc') - ->take($limit); - - if ($summary?->last_message_id) { - $lastMsg = Message::find($summary->last_message_id); - if ($lastMsg) { - $query->where('created_at', '>', $lastMsg->created_at); - } - } - - $messages = $query->get()->reverse()->values(); - - foreach ($messages as $message) { - if (empty($message->content)) { - continue; - } - - if ($message->author_id === $agent->id) { - $sdkMessages[] = new AssistantMessage($message->content); - } else { - $author = $message->author; - $authorName = $author->name ?? 'User'; - $sdkMessages[] = new UserMessage("[{$authorName}]: {$message->content}"); - } - } - - // 3. Check if compaction is needed (dispatch async) - if ($this->compactionService->needsCompaction($channelId, $agent, $sdkMessages)) { - CompactConversationJob::dispatch($channelId, $agent); - } - - return $sdkMessages; - } -} -``` - -### Job - -**`app/Jobs/CompactConversationJob.php`** - -```php -class CompactConversationJob implements ShouldQueue -{ - use Dispatchable, InteractsWithQueue, Queueable, SerializesModels; - - public int $tries = 2; - public int $timeout = 120; - - public function __construct( - private string $channelId, - private User $agent, - ) {} - - public function handle(ConversationCompactionService $compactor): void - { - // Load all raw messages for this channel - $messages = Message::where('channel_id', $this->channelId) - ->orderBy('created_at', 'asc') - ->get(); - - // Convert to SDK message format for the compactor - $sdkMessages = []; - foreach ($messages as $msg) { - if (empty($msg->content)) continue; - if ($msg->author_id === $this->agent->id) { - $sdkMessages[] = new AssistantMessage($msg->content); - } else { - $sdkMessages[] = new UserMessage($msg->content); - } - } - - $compactor->compact($this->channelId, $this->agent, $sdkMessages); - } -} -``` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `ConversationCompactionServiceTest` | Threshold detection, message splitting at keep_ratio, summary generation, cumulative summaries | -| `ChannelConversationLoaderTest` | Summary prepended, only post-summary messages loaded, compaction job dispatched | -| `CompactConversationJobTest` | Job loads messages, calls compactor, stores summary | - -### Files to Create/Modify - -``` -database/migrations/YYYY_MM_DD_000003_create_conversation_summaries_table.php -app/Models/ConversationSummary.php (new) -app/Services/Memory/ConversationCompactionService.php (new) -app/Jobs/CompactConversationJob.php (new) -app/Agents/Conversations/ChannelConversationLoader.php (modify) -config/memory.php (modify — add compaction section) -tests/Feature/Services/Memory/ConversationCompactionServiceTest.php -tests/Feature/Agents/Conversations/ChannelConversationLoaderTest.php -``` - ---- - -## Phase 5: STM → LTM Promotion — Pre-Compaction Memory Flush - -### Goal -**Bridge short-term and long-term memory.** Before compaction summarizes (and lossy-compresses) older messages from STM, give the agent a silent turn to promote important information to LTM via `save_memory`. This ensures key facts, preferences, and decisions survive compaction intact rather than being compressed into a summary. - -### Depends On -Phase 3 (SaveMemory tool), Phase 4 (ConversationCompactionService) - -### Config Updates - -Add `memory_flush` section to `config/memory.php`: - -```php -'memory_flush' => [ - 'enabled' => env('MEMORY_FLUSH_ENABLED', true), - 'soft_threshold_tokens' => 4000, // Trigger flush this many tokens before compaction threshold - 'max_flushes_per_cycle' => 1, // Prevent repeated flushes per compaction cycle -], -``` - -### Service - -**`app/Services/Memory/MemoryFlushService.php`** - -```php -class MemoryFlushService -{ - public function __construct( - private ConversationCompactionService $compactionService, - ) {} - - /** - * Check if a memory flush should be triggered. - * Flush happens when we're within soft_threshold_tokens of the compaction threshold, - * AND we haven't already flushed for this compaction cycle. - */ - public function shouldFlush(string $channelId, User $agent, iterable $messages): bool - { - if (!config('memory.memory_flush.enabled', true)) { - return false; - } - - $summary = ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->first(); - - $maxFlushes = config('memory.memory_flush.max_flushes_per_cycle', 1); - if ($summary && $summary->flush_count >= $maxFlushes) { - return false; - } - - $totalTokens = $this->estimateTokens($messages); - $compactionThreshold = config('memory.compaction.context_window', 128000) - * config('memory.compaction.threshold_ratio', 0.75); - $softThreshold = $compactionThreshold - config('memory.memory_flush.soft_threshold_tokens', 4000); - - return $totalTokens > $softThreshold && $totalTokens < $compactionThreshold; - } - - /** - * Execute a silent memory flush: run the agent with a flush prompt - * that instructs it to save important memories. - */ - public function flush(string $channelId, User $agent): void - { - $flushPrompt = $this->buildFlushPrompt(); - - // Create a transient agent instance with the flush prompt - $agentInstance = OpenCompanyAgent::for($agent, $channelId); - - // Run a silent prompt — the agent should use save_memory tool calls - // but the text response is discarded (not posted to the channel) - $response = $agentInstance->prompt($flushPrompt); - - // Increment flush count to prevent re-flushing - ConversationSummary::where('channel_id', $channelId) - ->where('agent_id', $agent->id) - ->increment('flush_count'); - - Log::info('Memory flush completed', [ - 'agent' => $agent->name, - 'channel' => $channelId, - 'tool_calls' => $response->toolCalls->count(), - ]); - } - - private function buildFlushPrompt(): string - { - return <<<'PROMPT' - Pre-compaction memory flush. Your conversation context is about to be compacted - (older messages will be summarized and compressed). - - Review the conversation for durable context worth preserving. Use save_memory - (target: "log") to save important observations, decisions, preferences, or - learnings to your daily log before they are compressed. - - Only save to target: "core" if you discovered truly high-value permanent facts - (user preferences, key decisions) that should always be in your system prompt. - - If nothing needs saving, respond with exactly: [FLUSH_COMPLETE] - PROMPT; - } - - private function estimateTokens(iterable $messages): int - { - $total = 0; - foreach ($messages as $msg) { - $content = $msg->content ?? ''; - $total += (int) ceil(str_word_count($content) * 1.3); - } - return $total; - } -} -``` - -### AgentRespondJob Integration - -Hook into `app/Jobs/AgentRespondJob.php` before the `prompt()` call (line 136): - -```php -// === Memory flush check (before prompting) === -try { - $flushService = app(MemoryFlushService::class); - $currentMessages = $agentInstance->messages(); - if ($flushService->shouldFlush($this->channelId, $this->agent, $currentMessages)) { - $flushStep = $task->addStep('Flushing memories before compaction', 'action'); - $flushStep->start(); - $flushService->flush($this->channelId, $this->agent); - $flushStep->complete(); - } -} catch (\Throwable $e) { - Log::warning('Memory flush failed', ['error' => $e->getMessage()]); -} -// === End memory flush === - -$response = $agentInstance->prompt($this->userMessage->content); -``` - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `MemoryFlushServiceTest` | Detects soft threshold, respects max_flushes_per_cycle, builds correct prompt | -| `AgentRespondJobFlushTest` | Flush triggered before prompt when threshold met, not triggered when below threshold | - -### Files to Create/Modify - -``` -app/Services/Memory/MemoryFlushService.php (new) -app/Jobs/AgentRespondJob.php (modify — add flush hook) -config/memory.php (modify — add memory_flush section) -tests/Feature/Services/Memory/MemoryFlushServiceTest.php -tests/Feature/Jobs/AgentRespondJobFlushTest.php -``` - ---- - -## Phase 6: Hybrid Search (BM25 + Vector) - -### Goal -Combine vector similarity search with PostgreSQL full-text search (BM25-equivalent) for more robust retrieval. Some queries work better with exact keyword matching, others with semantic understanding --- hybrid search gives the best of both. - -### Depends On -Phase 1 (DocumentChunk model), Phase 2 (DocumentIndexingService), Phase 3 (RecallMemory tool) - -### Database - -**Migration: `add_search_vector_to_document_chunks`** - -```php -// Add tsvector column with GIN index -Schema::table('document_chunks', function (Blueprint $table) { - $table->addColumn('tsvector', 'search_vector')->nullable(); -}); - -// Create GIN index for fast full-text search -DB::statement('CREATE INDEX document_chunks_search_vector_idx ON document_chunks USING GIN (search_vector)'); - -// Create trigger to auto-populate search_vector on insert/update -DB::statement(" - CREATE OR REPLACE FUNCTION document_chunks_search_vector_update() RETURNS trigger AS $$ - BEGIN - NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, '')); - RETURN NEW; - END - $$ LANGUAGE plpgsql; -"); - -DB::statement(" - CREATE TRIGGER document_chunks_search_vector_trigger - BEFORE INSERT OR UPDATE OF content ON document_chunks - FOR EACH ROW EXECUTE FUNCTION document_chunks_search_vector_update(); -"); - -// Backfill existing rows -DB::statement("UPDATE document_chunks SET search_vector = to_tsvector('english', COALESCE(content, ''))"); -``` - -### Service - -**`app/Services/Memory/HybridSearchService.php`** - -```php -class HybridSearchService -{ - public function __construct( - private EmbeddingService $embedder, - ) {} - - /** - * Hybrid search: combine vector similarity with full-text search. - * - * @return Collection Ranked results with combined scores - */ - public function search( - string $query, - string $collection = 'general', - ?string $agentId = null, - int $limit = 6, - float $minSimilarity = 0.5, - ): Collection { - $semanticWeight = config('memory.search.hybrid_weights.semantic', 0.7); - $keywordWeight = config('memory.search.hybrid_weights.keyword', 0.3); - $maxSnippetChars = 700; - $maxInjectedChars = 4000; - - // 1. Vector search - $vectorResults = $this->vectorSearch($query, $collection, $agentId, $limit * 2, $minSimilarity); - - // 2. Full-text search - $ftsResults = $this->ftsSearch($query, $collection, $agentId, $limit * 2); - - // 3. Merge by chunk ID with weighted scores - $merged = $this->mergeResults($vectorResults, $ftsResults, $semanticWeight, $keywordWeight); - - // 4. Sort by combined score, apply limits - $ranked = $merged->sortByDesc('score')->take($limit); - - // 5. Apply result clamping - $totalChars = 0; - $clamped = $ranked->filter(function ($result) use ($maxSnippetChars, $maxInjectedChars, &$totalChars) { - $snippet = Str::limit($result['content'], $maxSnippetChars); - if ($totalChars + strlen($snippet) > $maxInjectedChars) { - return false; - } - $totalChars += strlen($snippet); - return true; - }); - - return $clamped->values(); - } - - private function vectorSearch( - string $query, - string $collection, - ?string $agentId, - int $limit, - float $minSimilarity, - ): Collection { - $queryEmbedding = $this->embedder->embed($query); - $vectorString = '[' . implode(',', $queryEmbedding) . ']'; - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->selectRaw('id, document_id, content, metadata, 1 - (embedding <=> ?) as vector_score', [$vectorString]) - ->having('vector_score', '>=', $minSimilarity) - ->orderByDesc('vector_score') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } - - private function ftsSearch( - string $query, - string $collection, - ?string $agentId, - int $limit, - ): Collection { - $tsQuery = $this->buildTsQuery($query); - - $builder = DocumentChunk::query() - ->where('collection', $collection) - ->whereRaw('search_vector @@ to_tsquery(\'english\', ?)', [$tsQuery]) - ->selectRaw( - 'id, document_id, content, metadata, ts_rank(search_vector, to_tsquery(\'english\', ?)) as fts_score', - [$tsQuery] - ) - ->orderByDesc('fts_score') - ->limit($limit); - - if ($agentId !== null) { - $builder->where('agent_id', $agentId); - } else { - $builder->whereNull('agent_id'); - } - - return $builder->get(); - } - - /** - * Merge vector and FTS results by chunk ID with weighted scoring. - * Normalizes scores to [0, 1] range before combining. - */ - private function mergeResults( - Collection $vectorResults, - Collection $ftsResults, - float $semanticWeight, - float $keywordWeight, - ): Collection { - $merged = collect(); - - // Normalize vector scores - $maxVector = $vectorResults->max('vector_score') ?: 1; - $vectorNormalized = $vectorResults->keyBy('id')->map(fn ($r) => [ - 'id' => $r->id, - 'document_id' => $r->document_id, - 'content' => $r->content, - 'metadata' => $r->metadata, - 'vector_score' => $r->vector_score / $maxVector, - 'fts_score' => 0, - ]); - - // Normalize FTS scores - $maxFts = $ftsResults->max('fts_score') ?: 1; - foreach ($ftsResults as $result) { - $normalizedFts = $result->fts_score / $maxFts; - - if ($vectorNormalized->has($result->id)) { - // Merge: chunk appears in both results - $existing = $vectorNormalized->get($result->id); - $existing['fts_score'] = $normalizedFts; - $vectorNormalized->put($result->id, $existing); - } else { - $vectorNormalized->put($result->id, [ - 'id' => $result->id, - 'document_id' => $result->document_id, - 'content' => $result->content, - 'metadata' => $result->metadata, - 'vector_score' => 0, - 'fts_score' => $normalizedFts, - ]); - } - } - - // Calculate combined scores - return $vectorNormalized->map(function ($r) use ($semanticWeight, $keywordWeight) { - $r['score'] = ($r['vector_score'] * $semanticWeight) + ($r['fts_score'] * $keywordWeight); - return $r; - })->values(); - } - - /** - * Convert a natural language query to a PostgreSQL tsquery. - * Splits on spaces, joins with & (AND). - */ - private function buildTsQuery(string $query): string - { - $words = array_filter(explode(' ', trim($query)), fn ($w) => strlen($w) > 1); - return implode(' & ', array_map(fn ($w) => preg_replace('/[^a-zA-Z0-9]/', '', $w), $words)); - } -} -``` - -### Integration - -1. **Replace vector-only search in `DocumentIndexingService::search()`** with a call to `HybridSearchService::search()`, or make `DocumentIndexingService` delegate to `HybridSearchService` internally. - -2. **Update `RecallMemory` tool** to use `HybridSearchService` instead of `DocumentIndexingService::search()`: - -```php -// Before (Phase 3): -$results = $this->indexer->search($query, 'memory', $this->agent->id, $limit); - -// After (Phase 6): -$results = $this->hybridSearch->search($query, 'memory', $this->agent->id, $limit); -``` - -3. **Update `SearchDocuments` tool** semantic mode to use `HybridSearchService`. - -### Tests - -| Test | What it verifies | -|------|-----------------| -| `HybridSearchServiceTest` | Vector-only results, FTS-only results, merged results with correct weighting | -| `ScoreNormalizationTest` | Scores normalized to [0,1], combined scores correct | -| `ResultClampingTest` | maxResults, maxSnippetChars, maxInjectedChars all enforced | -| `TsQueryBuildingTest` | Natural language converted to valid tsquery | -| Migration test | tsvector column created, GIN index exists, trigger fires on insert/update | - -### Files to Create/Modify - -``` -database/migrations/YYYY_MM_DD_000004_add_search_vector_to_document_chunks.php -app/Services/Memory/HybridSearchService.php (new) -app/Services/Memory/DocumentIndexingService.php (modify — delegate to hybrid) -app/Agents/Tools/Memory/RecallMemory.php (modify — use hybrid search) -app/Agents/Tools/Docs/SearchDocuments.php (modify — use hybrid in semantic mode) -tests/Feature/Services/Memory/HybridSearchServiceTest.php -``` - ---- - -## Summary - -| Phase | Memory Type | What | Key Files | Depends On | -|-------|-------------|------|-----------|------------| -| 1 | Infrastructure | pgvector + ChunkingService + EmbeddingService | `Services/Memory/Chunking*.php`, `Services/Memory/Embedding*.php` | --- | -| 2 | Knowledge Base | Document indexing + observer + semantic search | `Services/Memory/DocumentIndexing*.php`, `Observers/DocumentObserver.php` | Phase 1 | -| 3 | **LTM** | SaveMemory + RecallMemory agent tools | `Tools/Memory/SaveMemory.php`, `Tools/Memory/RecallMemory.php` | Phase 1, 2 | -| 4 | **STM** | Conversation compaction + summaries | `Services/Memory/ConversationCompaction*.php`, `ChannelConversationLoader.php` | Phase 1 | -| 5 | **STM → LTM** | Pre-compaction memory flush | `Services/Memory/MemoryFlushService.php`, `AgentRespondJob.php` | Phase 3, 4 | -| 6 | Infrastructure | Hybrid search (BM25 + vector) | `Services/Memory/HybridSearchService.php` | Phase 1, 2, 3 | - -### OpenClaw Patterns Adapted - -| Pattern | OpenClaw | OpenCompany Adaptation | -|---------|----------|----------------------| -| Storage | SQLite per agent with sqlite-vec | Single PostgreSQL + pgvector, scoped by `agent_id` | -| Chunking | 400 tokens, 80 overlap | 512 tokens, 64 overlap (from `config/memory.php`) | -| Embedding | Pluggable, OpenAI primary | OpenAI text-embedding-3-small, cache by SHA256 | -| Hybrid weights | 0.7 vector + 0.3 BM25 | Same (from `config/memory.php`) | -| Compaction | Chunk-based, 0.4 keep ratio | Same, with cumulative summaries | -| Memory flush | Silent turn with NO_REPLY | Silent turn with `[FLUSH_COMPLETE]`, response discarded | -| Result clamping | maxResults=6, maxSnippetChars=700, maxInjectedChars=4000 | Same | -| Batch embeddings | Disabled by default; opt-in for large backfills | Always available via `embedBatch()`; used in bulk indexing jobs | -| Memory init timing | QMD eager-initialized on gateway startup (non-blocking) | Lazy via Laravel DI container (pgvector always warm) | -| Memory scope | Per-agent directory isolation + collection scoping (QMD `-c ` args) | Per-agent `agent_id` column + `collection` column scoping | -| Memory write tool | No dedicated save tool (uses filesystem write/edit directly) | Dedicated `save_memory` tool with `target` param | -| Memory read tools | `memory_search` (semantic) + `memory_get` (read by path/line) | `recall_memory` (semantic search over all LTM) | -| Memory files | `MEMORY.md` (curated) + `memory/YYYY-MM-DD.md` (daily logs) | Same layout under `agents/{slug}/` | -| MEMORY.md loading | Only in private sessions (scope: `chatType: "direct"`) | Always loaded via identity file system | -| Session memory hook | On `/new` command, saves transcript to `memory/YYYY-MM-DD-slug.md` | Not applicable (our sessions are persistent) | - ---- - -## Appendix A: OpenClaw Memory Architecture — Deep Dive - -> This appendix documents OpenClaw's exact memory implementation as a reference for our adaptation. -> Source: `inspiration/openclaw/` (v2026.2.9, Feb 2026) - -### A.1 Memory Files & Layout - -OpenClaw uses **plain Markdown in the agent workspace** as the source of truth: - -| File | Purpose | When loaded | Write pattern | -|------|---------|-------------|---------------| -| `MEMORY.md` | Curated long-term memory | **Only in private/direct sessions** (never in group contexts) | Overwrite/update by agent (using `write`/`edit` tools) | -| `memory/YYYY-MM-DD.md` | Daily log entries | Today + yesterday loaded at session start | Append-only | -| `memory/YYYY-MM-DD-slug.md` | Session transcripts | Not auto-loaded; searchable via `memory_search` | Created by `session-memory` hook on `/new` command | - -Key insight: **MEMORY.md is NOT always loaded.** OpenClaw explicitly skips it in group chats for privacy. Our system loads MEMORY.md always (via the identity file pipeline), which is simpler but means we should be careful about what agents store there. - -### A.2 Memory Tools (Read-Only!) - -**OpenClaw has NO dedicated `save_memory` tool.** Agents write to memory using the standard filesystem tools (`write`, `edit`, `exec`). The memory tools are read-only: - -```typescript -// memory_search — Semantic search over MEMORY.md + memory/*.md -description: "Mandatory recall step: semantically search MEMORY.md + memory/*.md -(and optional session transcripts) before answering questions about prior work, -decisions, dates, people, preferences, or todos; returns top snippets with path + lines." - -// memory_get — Read specific file content by path + line range -description: "Safe snippet read from MEMORY.md or memory/*.md with optional from/lines; -use after memory_search to pull only the needed lines and keep context small." -``` - -This means OpenClaw's agents must know the file layout and manually write to the correct paths. Our `save_memory` tool abstracts this complexity away, which is better for our multi-agent setup where agents shouldn't need filesystem knowledge. - -### A.3 System Prompt — Memory Section - -OpenClaw's system prompt includes a `## Memory Recall` section (only for non-subagent sessions that have memory tools enabled): - -``` -## Memory Recall -Before answering anything about prior work, decisions, dates, people, preferences, -or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull -only the needed lines. If low confidence after search, say you checked. -Citations: include Source: when it helps the user verify memory snippets. -``` - -This is notably **recall-focused** — it tells the agent when to search, not when to save. OpenClaw relies on the memory docs (`docs/concepts/memory.md`) and the flush prompt to drive save behavior. - -### A.4 Memory Flush Prompts (Pre-Compaction) - -OpenClaw's flush uses two prompts injected during the silent turn: - -**System prompt append:** -``` -Pre-compaction memory flush turn. -The session is near auto-compaction; capture durable memories to disk. -You may reply, but usually NO_REPLY is correct. -``` - -**User message (the flush trigger):** -``` -Pre-compaction memory flush. Store durable memories now -(use memory/YYYY-MM-DD.md; create memory/ if needed). -If nothing to store, reply with NO_REPLY. -``` - -Key details: -- Flush targets **daily logs** (`memory/YYYY-MM-DD.md`), not MEMORY.md -- `NO_REPLY` is a sentinel token that suppresses delivery to the user -- One flush per compaction cycle (tracked via `memoryFlushCompactionCount` in session store) -- Flush is **skipped** for read-only sandboxed workspaces and CLI providers -- Soft threshold: triggers 4000 tokens before compaction would fire - -### A.5 Compaction - -OpenClaw's compaction is handled by the embedded Pi agent runtime, not by OpenClaw itself: - -- **Trigger**: `contextTokens > contextWindow - reserveTokens` -- **Output**: A `compaction` entry in the JSONL transcript with `firstKeptEntryId` and `tokensBefore` -- **Effect**: Future turns see compaction summary + messages after the kept entry -- **Config**: `reserveTokens: 16384`, `keepRecentTokens: 20000` -- **Safety floor**: Minimum 20000 tokens reserve to ensure room for pre-compaction flush -- **Boot sync**: `memory.qmd.update.waitForBootSync` (default `false`) — when `true`, QMD boot refresh blocks startup. Default non-blocking behavior means first searches may hit a partially warmed index. -- **Manual**: `/compact` command (optionally with focus instructions) -- **Compaction is persistent** in the JSONL transcript, unlike session pruning (which is in-memory only) - -### A.6 Session-Memory Hook - -When the user runs `/new` (start a new session), OpenClaw's `session-memory` hook: - -1. Reads the last N messages (default 15) from the previous session -2. Uses LLM to generate a descriptive slug (e.g., "api-design", "bug-fix") -3. Saves to `memory/YYYY-MM-DD-slug.md` with session metadata -4. This is separate from the pre-compaction flush — it's a session-end memory capture - -### A.7 Workspace Files Loaded Into System Prompt - -OpenClaw loads these workspace files as "Project Context" in the system prompt: - -``` -IDENTITY.md → Agent identity and personality -SOUL.md → Persona and tone guidance -USER.md → User preferences and context -AGENTS.md → Multi-agent awareness -TOOLS.md → External tool guidance -MEMORY.md → Curated long-term memory (private sessions only!) -HEARTBEAT.md → Heartbeat prompt config -BOOTSTRAP.md → First-run bootstrapping (only for brand new workspaces) -``` - -For subagent sessions, only `AGENTS.md` and `TOOLS.md` are loaded (privacy/scope restriction). - -### A.8 Thesis Verification - -**Claim 1: "If the agent saves memory, isn't it always in MEMORY.md?"** - -**Partially incorrect.** In OpenClaw, agents write to TWO locations: -- `MEMORY.md` — for curated, high-value, durable facts (preferences, key decisions) -- `memory/YYYY-MM-DD.md` — for daily running context, timestamped observations - -The **memory flush explicitly targets daily logs**, not MEMORY.md: `"Store durable memories now (use memory/YYYY-MM-DD.md)"`. OpenClaw's docs say: "Decisions, preferences, and durable facts go to MEMORY.md. Day-to-day notes and running context go to memory/YYYY-MM-DD.md." - -**Claim 2: "Short-term memory is like summaries of compactions?"** - -**Correct.** STM is the conversation context window. When it fills up: -1. Pre-compaction flush saves important context to LTM (daily logs) -2. Compaction summarizes older messages into a persistent summary entry -3. Future turns see: compaction summary + recent messages - -The compaction summary IS the compressed form of STM. It's lossy — hence why the flush exists to promote key info to LTM before compression. - -### A.9 v2026.2.9 Changes - -The following changes were introduced in OpenClaw v2026.2.9 (tag `v2026.2.9`, Feb 2026): - -**1. Config migration: top-level → agents.defaults.memorySearch** -`memorySearch` config moved from top-level to `agents.defaults.memorySearch`. A legacy migration rule auto-migrates old configs and logs a deprecation warning. OpenClaw's `docs/concepts/memory.md` now explicitly states: *"Configure memory search under `agents.defaults.memorySearch` (not top-level `memorySearch`)."* Per-agent overrides take precedence over the new default location. - -**2. QMD eager initialization** -New `server-startup-memory.ts` module. The QMD memory manager is now initialized immediately on gateway startup (fire-and-forget, non-blocking) instead of lazily on first `memory_search` call. Update/embed timers are armed immediately. This addresses the documented "first search may be slow" problem by warming up GGUF models during startup. Boot refresh runs in background by default; set `memory.qmd.update.waitForBootSync = true` for blocking behavior. - -**3. Collection scoping** -New `buildCollectionFilterArgs()` method in `QmdMemoryManager`. QMD queries are now scoped to managed collections via `-c ` CLI args. If no managed collections are configured, the query returns empty results and logs a warning instead of searching undefined scope. Prevents accidental data exposure from misconfigured setups. - -**4. Model cache reuse** -New `symlinkSharedModels()` method. Symlinks the shared `~/.cache/qmd/models/` directory into each agent-specific `XDG_CACHE_HOME` path. This solves the problem of agent isolation (per-agent `XDG_CACHE_HOME` override) causing re-downloads of ~2.1 GB GGUF models. Result: per-agent index isolation + globally shared ML models. Cross-platform: handles `XDG_CACHE_HOME` on Linux/macOS and `LOCALAPPDATA` on Windows. Not directly applicable to our architecture (we use API-based embeddings, not local models). - -**5. Batch embeddings default off** -`agents.defaults.memorySearch.remote.batch.enabled` default changed from `true` to `false`. Batch API is now opt-in. Rationale: synchronous embedding is adequate for incremental updates; batch is mainly beneficial for large backfills. Providers supporting batch: OpenAI Batch API, Gemini async embeddings, Voyage AI. - -**6. ChatType unification (`dm` → `direct`)** -Session key parsing in `QmdMemoryManager.extractAgentIdFromSessionKey()` now accepts both `"direct"` and `"dm"` for backward compatibility. New sessions use `":direct:"` in generated keys. The QMD scope default rule uses `chatType: "direct"`. This is a semantic rename — `"direct"` is clearer than `"dm"`. - -> Note: Utility consolidation (commit ec910a235, `formatError` → `formatErrorMessage`) is an internal refactor with no architectural impact. - ---- - -## Appendix B: Complete System Prompt for Memory - -> This is the full memory-related system prompt text to inject into `OpenCompanyAgent::instructions()`. -> It should be appended after the identity files are loaded, so the agent already has MEMORY.md context. - -``` -## Memory System - -You have two types of memory: - -### Short-Term Memory (STM) -Your current conversation context. You can see recent messages directly. Older messages -are automatically summarized when the context window fills up. **You don't manage STM** — -the system handles it for you. - -### Long-Term Memory (LTM) -Durable memories that persist across all conversations. You manage LTM explicitly: - -- **MEMORY.md** (core memory) — Already loaded in your system prompt above. Contains - curated, high-value facts. You can add to it via `save_memory` with `target: "core"`. -- **Daily logs** — Timestamped entries searchable via `recall_memory`. Written via - `save_memory` with `target: "log"` (default). - -### save_memory - -Persist information to your long-term memory. Two targets: - -| Target | Storage | Loaded | Best for | -|--------|---------|--------|----------| -| `"core"` | MEMORY.md | Always (system prompt) | User preferences, key decisions, organizational knowledge, durable facts | -| `"log"` (default) | Daily log | On demand via `recall_memory` | Running context, timestamped observations, session learnings | - -Guidelines: -- Be specific: include who, what, why, and when -- Prefer `"core"` only for truly durable facts that should always be in context -- Prefer `"log"` for most saves — keeps MEMORY.md focused and manageable -- If someone says "remember this" — save it immediately (do not rely on conversation context) -- Use categories: preference, decision, learning, fact, general - -### recall_memory - -Semantically search your daily logs for past context. Use this: -- Before answering questions about prior work, decisions, or preferences -- At the start of complex tasks to gather relevant history -- When a user references something from a previous conversation -- When you have low confidence and need more context - -If recall_memory returns nothing relevant, tell the user you checked but found no prior context. - -### When to save vs when not to - -**Save:** -- User expresses a preference or working style -- An important decision is made (with reasoning) -- Key facts about a project, person, or the organization -- Learnings or insights from the current conversation -- Anything the user explicitly asks you to remember - -**Don't save:** -- Transient, obvious, or trivial information -- Information already in MEMORY.md -- Raw conversation snippets without context -- Temporary task state that won't matter later -``` - -### Updated Phase 5 Flush Prompt - -The flush prompt should align with OpenClaw's proven approach — target daily logs, be concise: - -```php -private function buildFlushPrompt(): string -{ - return <<<'PROMPT' - Pre-compaction memory flush. Your conversation context is about to be compacted - (older messages will be summarized and compressed). - - Review the conversation for durable context worth preserving. Use save_memory - (target: "log") to save important observations, decisions, preferences, or - learnings to your daily log before they are compressed. - - Only save to target: "core" if you discovered truly high-value permanent facts - (user preferences, key decisions) that should always be in your system prompt. - - If nothing needs saving, respond with exactly: [FLUSH_COMPLETE] - PROMPT; -} -``` diff --git a/docs/testing/feature-test-map.md b/docs/testing/feature-test-map.md deleted file mode 100644 index e6b2f53..0000000 --- a/docs/testing/feature-test-map.md +++ /dev/null @@ -1,1093 +0,0 @@ -# OpenCompany Feature Test Map - -Complete checklist of all features, buttons, and functionality to test. - ---- - -## 1. AUTHENTICATION PAGES - -### Login (`/login`) -- [ ] Email input field -- [ ] Password input field -- [ ] "Remember me" checkbox -- [ ] Login button (submit) -- [ ] "Forgot password" link -- [ ] Register link -- [ ] Error states for invalid credentials -- [ ] Loading state on submit - -### Register (`/register`) -- [ ] Name input field -- [ ] Email input field -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Register button (submit) -- [ ] Login link -- [ ] Validation errors display -- [ ] Loading state on submit - -### Forgot Password (`/forgot-password`) -- [ ] Email input field -- [ ] Send reset link button -- [ ] Success message display -- [ ] Back to login link - -### Reset Password (`/reset-password/{token}`) -- [ ] Password input field -- [ ] Confirm password input field -- [ ] Reset password button -- [ ] Validation errors - -### Verify Email (`/verify-email`) -- [ ] Resend verification email button -- [ ] Success message display - ---- - -## 2. DASHBOARD (`/` or `/dashboard`) - -### Header -- [ ] Page title displays -- [ ] Subtitle displays - -### Stats Overview -- [ ] Agents Online stat card -- [ ] Pending Tasks stat card -- [ ] Unread Messages stat card -- [ ] Each stat shows correct number - -### Pending Approvals Section (if any) -- [ ] Approval cards display -- [ ] Approve button per item -- [ ] Reject button per item -- [ ] Amount display -- [ ] Requester info display -- [ ] View all link - -### Activity Feed -- [ ] Activity items load -- [ ] Activity type icons display -- [ ] Timestamps display -- [ ] User/agent avatars display -- [ ] Activity descriptions -- [ ] Load more (if > 20 items) - -### Quick Actions -- [ ] "Spawn Agent" button → opens modal -- [ ] "New Channel" button → opens modal -- [ ] "Create Task" button → opens modal -- [ ] "New Document" button → navigates - -### Working Agents Sidebar -- [ ] Agent cards display -- [ ] Agent status indicators (working/idle) -- [ ] Current task display -- [ ] Click agent → navigate to profile - -### Spawn Agent Modal -- [ ] Agent type selection (6 types: writer, analyst, researcher, creative, coder, coordinator) -- [ ] Agent name input -- [ ] Initial task textarea (optional) -- [ ] Behavior mode select (autonomous/supervised/strict) -- [ ] Ephemeral agent toggle -- [ ] Estimated cost display -- [ ] Cancel button -- [ ] Spawn Agent button -- [ ] Loading state on spawn - ---- - -## 3. CHAT (`/chat`) - -### Channel List Sidebar -- [ ] Channel items display -- [ ] Unread count badges -- [ ] Channel type icons (public/private/agent/dm/external) -- [ ] Selected channel highlight -- [ ] "New Channel" button -- [ ] Search channels (if available) - -### Create Channel Modal -- [ ] Channel type selection (public/private/agent/dm/external) -- [ ] Channel name input (validation: lowercase, hyphens) -- [ ] Description textarea -- [ ] Member search input -- [ ] Available members list -- [ ] Selected members chips with X buttons -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Chat Area -- [ ] Channel header with name -- [ ] Member count display -- [ ] Pinned messages button with count -- [ ] Members info button -- [ ] Messages load correctly -- [ ] Message grouping by author -- [ ] Date separators display -- [ ] Avatar display per message -- [ ] Timestamp per message -- [ ] Scroll to bottom on new messages -- [ ] Load more old messages (scroll up) - -### Message Features -- [ ] Hover actions appear on messages -- [ ] React to message (emoji picker) -- [ ] Reply to message (thread) -- [ ] Pin message button -- [ ] Edit own message -- [ ] Delete own message -- [ ] Message reactions display -- [ ] Reaction counts - -### Message Input -- [ ] Textarea for typing -- [ ] Auto-resize on multi-line -- [ ] Attach file button (+) -- [ ] Emoji picker button -- [ ] Mention button (@) -- [ ] Send button -- [ ] Enter to send (Shift+Enter for newline) -- [ ] Character counter (if enabled) -- [ ] Format toolbar (bold, italic, code, etc.) -- [ ] @mention autocomplete popup -- [ ] Slash commands popup (/) -- [ ] Attachment preview with upload progress -- [ ] Reply-to banner (when replying) -- [ ] Cancel reply button -- [ ] Edit mode banner -- [ ] Cancel edit button - -### Channel Info Panel -- [ ] Toggle open/close -- [ ] Channel description -- [ ] Member list with avatars -- [ ] Member roles/types -- [ ] Add member button - -### Add Member Modal -- [ ] Search users input -- [ ] User list with selection checkboxes -- [ ] Selected count display -- [ ] Cancel button -- [ ] Add Members button - -### Pinned Messages Panel -- [ ] Toggle open/close -- [ ] Pinned messages list -- [ ] Click to jump to message -- [ ] Unpin button - -### Typing Indicator -- [ ] Shows when others typing -- [ ] Multiple users typing text - ---- - -## 4. DIRECT MESSAGES - -> **Note:** `/messages` now redirects to `/chat`. DMs are part of the unified chat interface and appear as `dm` type channels in the channel list. - -### DM Conversations (via `/chat`) -- [ ] DM channels appear in channel list -- [ ] DM channel type icon distinct from other types -- [ ] "New Message" button -- [ ] Search conversations input -- [ ] Avatar per conversation -- [ ] Last message preview -- [ ] Time ago display -- [ ] Unread count badges -- [ ] Click to open conversation -- [ ] Loading skeleton state -- [ ] Empty state if no conversations - -### New Message Modal -- [ ] Recipient select dropdown -- [ ] User/agent list with type labels -- [ ] Cancel button -- [ ] Start Chat button - -### Conversation View (`/messages/{id}`) -- [ ] Floating header with back button -- [ ] User/agent avatar and name -- [ ] User type label -- [ ] Status indicator (for agents) -- [ ] Settings/gear button (for agents) -- [ ] Profile link button -- [ ] Messages display -- [ ] Own messages right-aligned (dark bubble) -- [ ] Other messages left-aligned (light bubble) -- [ ] Avatar grouping (hide repeated) -- [ ] Timestamps per message -- [ ] Markdown rendering (bold, italic, code, links, lists) -- [ ] Code blocks with syntax highlighting -- [ ] Typing indicator -- [ ] Message input textarea -- [ ] Auto-resize input -- [ ] Send button -- [ ] Loading state on send -- [ ] Empty state for new conversations - ---- - -## 5. TASKS (`/tasks`) - -### Header -- [ ] Page title "Tasks" -- [ ] Filter tabs by status (All/Active/Pending/Completed/Failed) -- [ ] Filter by agent -- [ ] Filter by priority -- [ ] Filter by type -- [ ] "Create Task" button - -### Task List -- [ ] Task rows display -- [ ] Task title -- [ ] Type badge (ticket/request/analysis/content/research/custom) -- [ ] Status badge with color (pending/active/paused/completed/failed/cancelled) -- [ ] Priority badge (low/medium/high/urgent) -- [ ] Assigned agent with avatar -- [ ] Due date display -- [ ] Click to open task detail - -### Create Task Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Type select (ticket/request/analysis/content/research/custom) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Agent assignment select -- [ ] Due date input -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Task Detail View (`/tasks/{id}`) -- [ ] Task title display -- [ ] Type badge -- [ ] Status badge with color -- [ ] Priority badge -- [ ] Description display -- [ ] Assigned agent with avatar -- [ ] Requester info -- [ ] Channel link (if linked) -- [ ] Due date -- [ ] Created/started/completed timestamps -- [ ] Lifecycle action buttons: - - [ ] Start button (pending → active) - - [ ] Pause button (active → paused) - - [ ] Resume button (paused → active) - - [ ] Complete button (active → completed) - - [ ] Fail button (active → failed) - - [ ] Cancel button (any → cancelled) - -### Task Steps -- [ ] Steps list display -- [ ] Step description -- [ ] Step type badge (action/decision/approval/sub_task/message) -- [ ] Step status indicator (pending/in_progress/completed/skipped) -- [ ] Step timestamps -- [ ] Step metadata display - -### Sub-Tasks -- [ ] Sub-task list (if parent task) -- [ ] Sub-task status indicators -- [ ] Click to open sub-task - ---- - -## 6. LISTS (`/lists`) - -### Header -- [ ] Page title "Lists" -- [ ] View mode tabs (Board/List) -- [ ] Filter dropdown -- [ ] "Create Item" button - -### Board View (Kanban) -- [ ] Backlog column with count -- [ ] In Progress column with count -- [ ] Done column with count -- [ ] Item cards in each column -- [ ] Drag and drop between columns -- [ ] Item card: title, priority badge, assignee avatar, cost - -### List View -- [ ] Item rows in table format -- [ ] Sortable columns -- [ ] Item details visible - -### Create Item Modal -- [ ] Title input (required) -- [ ] Description textarea -- [ ] Status select (backlog/in_progress/done) -- [ ] Priority select (low/medium/high/urgent) -- [ ] Assignee select (grouped: agents/humans) -- [ ] Estimated cost input -- [ ] Channel select (optional) -- [ ] Cancel button -- [ ] Create button -- [ ] Loading state - -### Item Detail Slideover -- [ ] Item title display -- [ ] Edit button → edit mode -- [ ] Close (X) button -- [ ] Status badge with color -- [ ] Priority badge with color -- [ ] Description display -- [ ] Assignee with avatar -- [ ] Cost display -- [ ] Created date -- [ ] Completed date (if done) -- [ ] Mark Complete button -- [ ] Reopen button (if done) -- [ ] Delete button -- [ ] Collaborators section -- [ ] Comments section -- [ ] Add comment input -- [ ] Comment list -- [ ] Delete comment (hover reveal) -- [ ] Edit mode: editable title -- [ ] Edit mode: editable description -- [ ] Edit mode: status select -- [ ] Edit mode: priority select -- [ ] Edit mode: cost input -- [ ] Save/Cancel buttons in edit mode - ---- - -## 7. DOCUMENTS (`/docs`) - -### Document List Sidebar -- [ ] Search documents input -- [ ] Document tree/list display -- [ ] Document icons -- [ ] Selected document highlight -- [ ] "New Document" button -- [ ] Folder structure (if any) - -### Document Viewer/Editor -- [ ] Document title display -- [ ] Edit button -- [ ] Version history button -- [ ] Comments toggle button -- [ ] Attachments button -- [ ] Document content display -- [ ] Markdown rendering -- [ ] Code blocks with highlighting -- [ ] Edit mode: textarea/editor -- [ ] Save button in edit mode -- [ ] Cancel edit button - -### Version History Panel -- [ ] Version list display -- [ ] Version timestamps -- [ ] Version author -- [ ] Change description -- [ ] View diff button per version -- [ ] Restore version button -- [ ] Current version indicator - -### Diff Viewer Modal -- [ ] Side-by-side diff view -- [ ] Additions highlighted (green) -- [ ] Deletions highlighted (red) -- [ ] Version labels -- [ ] Close button - -### Comments Panel -- [ ] Comments list -- [ ] Comment author avatars -- [ ] Comment timestamps -- [ ] Reply to comment -- [ ] Resolve comment button -- [ ] Resolved comments section -- [ ] Add comment input -- [ ] Submit comment button - -### Attachments Panel -- [ ] Attachments list -- [ ] File icons -- [ ] File names -- [ ] Download button per file -- [ ] Delete button per file -- [ ] Upload attachment button - ---- - -## 8. ACTIVITY (`/activity`) - -### Header -- [ ] Page title -- [ ] Filter options - -### Filter Panel -- [ ] Activity type filters (messages/tasks/approvals/agents/errors) -- [ ] User filter dropdown -- [ ] Date range filters (today/week/month/all) - -### Activity Timeline -- [ ] Activity items display -- [ ] Type icons per activity -- [ ] User/agent avatars -- [ ] Timestamps -- [ ] Activity descriptions -- [ ] Metadata (task titles, amounts, channels) -- [ ] Load more button -- [ ] Empty state if no activities - ---- - -## 9. APPROVALS (`/approvals`) - -### Header -- [ ] Page title -- [ ] Filter tabs with counts - -### Filter Tabs -- [ ] All tab -- [ ] Pending tab (with count) -- [ ] Approved tab -- [ ] Rejected tab - -### Approval List -- [ ] Approval cards display -- [ ] Request title -- [ ] Description -- [ ] Amount display -- [ ] Requester info with avatar -- [ ] Status badge -- [ ] Approve button (pending only) -- [ ] Reject button (pending only) -- [ ] Responder info (approved/rejected) -- [ ] Response timestamp -- [ ] Loading state -- [ ] Empty state per filter - ---- - -## 10. AUTOMATION (`/automation`) - -### Header -- [ ] Page title -- [ ] Tab navigation - -### Task Templates Tab -- [ ] Templates list display -- [ ] "New Template" button -- [ ] Template cards with: - - [ ] Template name - - [ ] Default title - - [ ] Priority badge - - [ ] Default assignee - - [ ] Estimated cost - - [ ] Tags display - - [ ] Edit button - - [ ] Delete button - - [ ] Use template button - -### Template Modal (Create/Edit) -- [ ] Template name input -- [ ] Default title input -- [ ] Default priority select -- [ ] Default assignee select -- [ ] Estimated cost input -- [ ] Tags input -- [ ] Cancel button -- [ ] Save button - -### Automation Rules Tab -- [ ] Rules list display -- [ ] "New Rule" button -- [ ] Rule cards with: - - [ ] Rule name - - [ ] Trigger type - - [ ] Action type - - [ ] Template association - - [ ] Enabled/disabled toggle - - [ ] Trigger count - - [ ] Edit button - - [ ] Delete button - -### Rule Modal (Create/Edit) -- [ ] Rule name input -- [ ] Trigger type select (task created/completed/assigned/approval) -- [ ] Action type select (create task/assign/notify/spawn agent) -- [ ] Template select (if action = create task) -- [ ] Enabled toggle -- [ ] Cancel button -- [ ] Save button - ---- - -## 11. ORGANIZATION (`/org`) - -### Header -- [ ] Page title -- [ ] Subtitle - -### View Mode Toggle -- [ ] Tree View button -- [ ] Chart View button -- [ ] Active state on selected - -### Tree View -- [ ] Tree structure displays -- [ ] Node cards with avatars -- [ ] Agent type badges -- [ ] Status indicators (working/idle) -- [ ] Current task display -- [ ] Email for humans -- [ ] Ephemeral badge if applicable -- [ ] Expand/collapse children -- [ ] Expand indicator with count -- [ ] Click to expand/collapse -- [ ] Keyboard navigation (Tab, Enter, Space) -- [ ] Focus ring on keyboard focus -- [ ] Profile link per node - -### Chart View -- [ ] Horizontal org chart displays -- [ ] Node cards with avatars -- [ ] Connector lines between nodes -- [ ] Root node highlighted -- [ ] Agent/human icons -- [ ] Ephemeral badge -- [ ] Focus indicator on cards -- [ ] Profile link per node - -### Stats Section -- [ ] Total Members stat card -- [ ] Humans stat card -- [ ] Agents stat card -- [ ] Active Agents stat card -- [ ] Correct counts displayed - ---- - -## 12. WORKLOAD (`/workload`) - -### Summary Cards -- [ ] Active Agents card -- [ ] Current Tasks card -- [ ] Completed Today card -- [ ] Average Efficiency card - -### Agent Workload Cards -- [ ] Agent cards display -- [ ] Agent avatar with status -- [ ] Agent name and type -- [ ] Workload score bar -- [ ] Efficiency percentage -- [ ] Tasks in progress count -- [ ] Tasks pending count -- [ ] Tasks completed count -- [ ] Total cost display -- [ ] Status badge - -### Auto-refresh -- [ ] Data refreshes every 30 seconds -- [ ] Loading indicator on refresh - ---- - -## 13. CALENDAR (`/calendar`) - -### Sidebar -- [ ] Mini calendar display -- [ ] Date selection -- [ ] Today highlight -- [ ] Month navigation - -### View Mode Buttons -- [ ] Month view button -- [ ] Week view button -- [ ] Day view button - -### Calendar Grid -- [ ] Month view: full month grid -- [ ] Week view: 7 days with hours -- [ ] Day view: single day with hours -- [ ] Events display on dates -- [ ] Click date to create event -- [ ] Click event to view/edit - -### Navigation -- [ ] Previous period button -- [ ] Next period button -- [ ] Today button -- [ ] Period label (dynamic) - -### Event Modal -- [ ] Event title input -- [ ] Date/time inputs -- [ ] Description textarea -- [ ] Cancel button -- [ ] Save button -- [ ] Delete button (edit mode) - ---- - -## 14. SETTINGS (`/settings`) - -### Organization Settings -- [ ] Organization name input -- [ ] Organization email input -- [ ] Timezone select -- [ ] Save button - -### Agent Defaults -- [ ] Default behavior mode select -- [ ] Cost limit input -- [ ] Auto-spawn toggle -- [ ] Save button - -### Action Policies -- [ ] Policies list -- [ ] "Add Policy" button -- [ ] Policy card: pattern, threshold, approval level -- [ ] Edit policy button -- [ ] Delete policy button - -### Policy Modal -- [ ] Pattern input -- [ ] Cost threshold input -- [ ] Approval level select -- [ ] Cancel button -- [ ] Save button - -### Notifications -- [ ] Email notifications toggle -- [ ] Slack notifications toggle -- [ ] Daily summary toggle -- [ ] Save button - -### Danger Zone -- [ ] Pause all agents button -- [ ] Reset agent memory button -- [ ] Delete organization button -- [ ] Confirmation dialogs for each - ---- - -## 15. INTEGRATIONS (`/integrations`) - -### Webhooks Section -- [ ] Webhooks list -- [ ] "Create Webhook" button -- [ ] Webhook cards: - - [ ] URL display - - [ ] Target/events display - - [ ] Enabled/disabled toggle - - [ ] Last triggered date - - [ ] Call count - - [ ] Edit button - - [ ] Delete button - -### Webhook Modal -- [ ] URL input -- [ ] Target selection -- [ ] Events multiselect -- [ ] Cancel button -- [ ] Save button - -### API Keys Section -- [ ] API keys list -- [ ] "Generate Key" button -- [ ] Key cards: - - [ ] Key name - - [ ] Masked key value - - [ ] Copy button - - [ ] Revoke button - - [ ] Created date - -### Connected Services -- [ ] Services list/grid -- [ ] Service icons -- [ ] Service names -- [ ] Connection status -- [ ] Connect/Disconnect buttons - ---- - -## 16. TABLES (`/tables`) - -### Header -- [ ] Page title -- [ ] "New Table" button - -### Tables Grid -- [ ] Table cards display -- [ ] Table icons -- [ ] Table names -- [ ] Descriptions -- [ ] Row counts -- [ ] Column counts -- [ ] Click to open table -- [ ] Delete button per table - -### Create Table Modal -- [ ] Table name input -- [ ] Description textarea -- [ ] Icon selection (optional) -- [ ] Cancel button -- [ ] Create button - -### Empty State -- [ ] Empty state message -- [ ] Create table button - ---- - -## 17. TABLE VIEW (`/tables/{id}`) - -### Header -- [ ] Back button -- [ ] Table icon -- [ ] Table name -- [ ] Table description -- [ ] "Add Column" button -- [ ] "Add Row" button - -### Toolbar -- [ ] Search rows input -- [ ] Selected count display -- [ ] Bulk delete button (when selected) -- [ ] Row count display - -### Table Grid -- [ ] Column headers -- [ ] Column type indicators -- [ ] Column menu button (hover) -- [ ] Row selection checkboxes -- [ ] Cell data display per type: - - [ ] Text: inline edit - - [ ] Number: inline edit - - [ ] Date: date picker - - [ ] Checkbox: toggle - - [ ] Select: dropdown - - [ ] Multiselect: tags with add/remove - - [ ] URL: link display, edit button - - [ ] Email: mailto link, edit button -- [ ] Row actions menu (hover) -- [ ] Delete row button - -### Column Menu -- [ ] Edit column option -- [ ] Delete column option - -### Add Column Modal -- [ ] Column name input -- [ ] Column type selection grid -- [ ] Type descriptions -- [ ] Options input (for select/multiselect) -- [ ] Required toggle -- [ ] Cancel button -- [ ] Add Column button - -### Edit Column Modal -- [ ] Pre-filled column name -- [ ] Type change warning -- [ ] Options editing -- [ ] Cancel button -- [ ] Save Changes button - -### Bulk Delete Confirmation -- [ ] Confirmation message with count -- [ ] Cancel button -- [ ] Delete Rows button - ---- - -## 18. AGENT PROFILE (`/agent/{id}`) - -### Header -- [ ] Agent avatar with status -- [ ] Agent name -- [ ] Agent type badge -- [ ] Status badge (working/idle/paused) -- [ ] Emoji display -- [ ] Current task display -- [ ] Message button -- [ ] Pause/Resume button - -### Tabs -- [ ] Overview tab -- [ ] Personality tab -- [ ] Instructions tab -- [ ] Capabilities tab -- [ ] Memory tab -- [ ] Activity tab -- [ ] Settings tab - -### Overview Tab -- [ ] Agent summary -- [ ] Recent activity -- [ ] Quick stats - -### Personality Tab -- [ ] Personality editor textarea -- [ ] Save button - -### Instructions Tab -- [ ] Instructions editor textarea -- [ ] Save button - -### Capabilities Tab -- [ ] Capabilities list -- [ ] Capability enabled/disabled status -- [ ] Approval tracking per capability - -### Memory Tab -- [ ] Memory entries list -- [ ] Add memory button -- [ ] Clear memory button - -### Activity Tab -- [ ] Activity log -- [ ] Activity type icons -- [ ] Timestamps -- [ ] Load more - -### Settings Tab -- [ ] Agent-specific settings -- [ ] Session management -- [ ] Save button - ---- - -## 19. USER PROFILE (`/profile/{id}`) - -### Header -- [ ] User avatar -- [ ] User name -- [ ] User type badge (human/agent) -- [ ] Email display -- [ ] Ephemeral indicator (if agent) -- [ ] Status display -- [ ] Current task (if agent) -- [ ] Message button -- [ ] Manage Agent button (if agent) - -### Tabs -- [ ] Activity tab -- [ ] Tasks tab - -### Activity Tab -- [ ] Activity steps list -- [ ] Status indicators -- [ ] Timestamps - -### Tasks Tab -- [ ] Assigned tasks list -- [ ] Task status badges -- [ ] Click to open task - ---- - -## 20. PROFILE EDIT (`/profile`) - -### Update Profile Form -- [ ] Name input -- [ ] Email input -- [ ] Save button -- [ ] Success message - -### Update Password Form -- [ ] Current password input -- [ ] New password input -- [ ] Confirm password input -- [ ] Save button -- [ ] Validation errors - -### Delete Account Section -- [ ] Delete account button -- [ ] Confirmation modal -- [ ] Password confirmation input -- [ ] Confirm delete button - ---- - -## 21. GLOBAL FEATURES - -### Sidebar Navigation -- [ ] All navigation links work -- [ ] Active state on current page -- [ ] Badge counts (Chat, Approvals) -- [ ] Collapse/expand (if available) - -### User Menu -- [ ] User avatar click -- [ ] Username display -- [ ] Role display -- [ ] Profile link -- [ ] Settings link -- [ ] Logout button - -### Command Palette (Cmd/Ctrl+K) -- [ ] Opens on shortcut -- [ ] Search input autofocus -- [ ] Mode tabs (Commands/Files/Channels/Agents) -- [ ] Recent searches display -- [ ] Command groups -- [ ] Arrow key navigation -- [ ] Enter to execute -- [ ] Escape to close -- [ ] Prefix searches (#channels, @agents) - -### Keyboard Shortcuts -- [ ] Cmd/Ctrl+K: Command palette -- [ ] Escape: Close modals/palettes -- [ ] g+h: Go to Dashboard -- [ ] g+c: Go to Chat -- [ ] g+t: Go to Tasks -- [ ] g+d: Go to Docs -- [ ] g+a: Go to Approvals -- [ ] g+o: Go to Organization -- [ ] g+s: Go to Settings - -### Dark Mode -- [ ] Toggle dark mode -- [ ] All pages render correctly -- [ ] All components have dark variants -- [ ] System preference detection - -### Real-Time Updates -- [ ] WebSocket connection establishes -- [ ] New messages appear instantly -- [ ] Typing indicators work -- [ ] Activity feed updates -- [ ] Presence updates - -### Loading States -- [ ] Skeleton loaders display -- [ ] Spinner indicators -- [ ] Button loading states -- [ ] Page transition loading - -### Error States -- [ ] Error messages display -- [ ] Retry buttons work -- [ ] Form validation errors -- [ ] API error handling - -### Empty States -- [ ] Empty state messages -- [ ] Call-to-action buttons -- [ ] Helpful descriptions - -### Responsive Design -- [ ] Mobile layout (if supported) -- [ ] Tablet layout -- [ ] Desktop layout -- [ ] Sidebar behavior on resize - ---- - -## 22. SHARED COMPONENTS TO TEST - -### Button -- [ ] Primary variant -- [ ] Secondary variant -- [ ] Ghost variant -- [ ] Danger variant -- [ ] Link variant -- [ ] Outline variant -- [ ] Success variant -- [ ] All sizes (xs/sm/md/lg/xl) -- [ ] Loading state -- [ ] Disabled state -- [ ] With icons (left/right) -- [ ] Icon-only mode -- [ ] Tooltip display - -### Input -- [ ] All types (text/email/password/number/etc) -- [ ] All sizes -- [ ] With label -- [ ] With error message -- [ ] With success indicator -- [ ] Clearable (X button) -- [ ] Copyable (copy button) -- [ ] Password toggle -- [ ] Character counter -- [ ] Disabled state -- [ ] Readonly state - -### Select -- [ ] Dropdown opens/closes -- [ ] Item selection -- [ ] Placeholder display -- [ ] Icon display -- [ ] Disabled state - -### Checkbox -- [ ] Check/uncheck toggle -- [ ] Label display -- [ ] Description display -- [ ] Disabled state - -### Modal -- [ ] Opens/closes -- [ ] Escape key closes -- [ ] Click outside closes (if enabled) -- [ ] Header/content/footer slots -- [ ] All sizes - -### Confirm Dialog -- [ ] Opens on trigger -- [ ] Confirm button works -- [ ] Cancel button works -- [ ] Input validation (if required) -- [ ] Checkbox state -- [ ] All variants - -### Badge -- [ ] All variants -- [ ] All styles (soft/solid/outline) -- [ ] With count -- [ ] Removable -- [ ] With icon -- [ ] With avatar - -### Avatar -- [ ] Image display -- [ ] Fallback initials -- [ ] Agent icon fallback -- [ ] Status dot indicator -- [ ] All sizes -- [ ] All shapes -- [ ] Tooltip display - -### Tooltip -- [ ] Hover display -- [ ] All positions -- [ ] Delay works -- [ ] Disabled state - -### Dropdown Menu -- [ ] Opens/closes -- [ ] Item click works -- [ ] Submenu opens -- [ ] Keyboard navigation -- [ ] Disabled items - -### Skeleton -- [ ] All presets display correctly -- [ ] Animation works - -### Stat Card -- [ ] Value display -- [ ] Label display -- [ ] Icon display -- [ ] Trend indicator -- [ ] Sparkline chart -- [ ] Progress bar -- [ ] Click interaction - ---- - -## Total Test Items: ~750+ - -Use this checklist to systematically test every feature in the application. diff --git a/docs/todo.md b/docs/todo.md deleted file mode 100644 index f65126f..0000000 --- a/docs/todo.md +++ /dev/null @@ -1,38 +0,0 @@ -# Docs Feature TODO - -## Quick wins - -- [ ] **Document starring/pinning** — Add backend fields + API endpoints, frontend toggle in tree items. Mark important docs for quick access. -- [ ] **Content search** — Extend search to full-text content, not just titles/authors. -- [ ] **Publish controls** — Wire up existing `is_published`/`published_at` backend fields to frontend UI. - -## Medium effort - -- [ ] **Permission management UI** — Share modal with user picker for managing viewer/editor roles after doc creation. -- [ ] **Document sharing** — Share button, copy link, public/private toggle. -- [ ] **Fix DocumentAttachments API wiring** — Replace placeholder stub functions with actual `useApi` composable methods. - -## Bigger features - -- [ ] **Inline/selection-based comments** — Highlight text and comment on specific passages instead of doc-level only. -- [ ] **Document templates** — Pre-built templates for common doc types (meeting notes, specs, proposals, etc.). -- [ ] **Bulk operations** — Multi-select docs for move, delete, export, etc. - ---- - -# Agent System TODO - -## Budget Approval Type - -The `ApprovalRequest` model already has an `amount` column and `type: 'budget'` is defined in the migration, but the budget approval flow is not yet implemented. - -### What it does -Agents requesting approval when the estimated cost of an action exceeds a configurable threshold. This prevents runaway API costs from autonomous agents. - -### What needs building -- [ ] **Cost threshold config** — Per-agent or per-workspace `cost_threshold` setting (e.g., $5.00). Could be a column on `users` table or a workspace-level setting. -- [ ] **Cost estimation in tool wrappers** — Before executing an expensive tool (LLM calls, external API calls), estimate the cost and compare against the threshold. -- [ ] **Budget approval creation** — When estimated cost exceeds threshold, create an `ApprovalRequest` with `type: 'budget'` and `amount` set to the estimated cost. -- [ ] **UI amount display** — Show the `amount` field on the Approvals page for budget-type requests, formatted as currency. The Approvals page already has a `formatCurrency` helper and renders `approval.amount` when present. -- [ ] **Running cost tracking** — Track cumulative costs per agent per day/session. Compare running total against threshold, not just individual actions. -- [ ] **Cost threshold UI** — Add cost threshold setting to AgentSettingsPanel (alongside behavior mode and sleep controls). diff --git a/resources/lua-docs/_overview.md b/resources/lua-docs/_overview.md index 166c60f..f9951a8 100644 --- a/resources/lua-docs/_overview.md +++ b/resources/lua-docs/_overview.md @@ -8,6 +8,9 @@ Lua scripts in OpenCompany run in a sandboxed environment with access to workspa app.{namespace}.* — Internal workspace apps app.integrations.{name}.* — Integration-specific tools app.mcp.{server}.* — MCP server tools +json.decode(string) / json.encode(value) — JSON parsing and serialization +regex.match(s, p) / regex.match_all(s, p) — PCRE regex matching +regex.gsub(s, p, r) — PCRE regex substitution ``` Available namespaces are determined by the agent's permissions. Use `lua_list_docs` to see what's available. @@ -67,6 +70,79 @@ local sites = dump(app.integrations.plausible.list_sites()) -- prints the table contents, then continues with sites as a variable ``` +### `json.decode(string)` + +Parses a JSON string into a Lua table. Uses PHP's `json_decode` under the hood, so it handles all standard JSON types including nested objects and arrays. + +```lua +-- Parse a JSON string +local data = json.decode('{"items": [1, 2, 3]}') +print(data.items[1]) -- 1 + +-- Parse JSON received from an integration +local raw = app.http.get({url = "https://api.example.com/data"}) +local parsed = json.decode(raw.body) +print(parsed.status) +``` + +Raises an error on invalid JSON. Use `pcall` for error handling: + +```lua +local ok, data = pcall(json.decode, raw_string) +if not ok then + print("Invalid JSON: " .. tostring(data)) +end +``` + +### `json.encode(value)` + +Serializes a Lua table (or any value) to a JSON string. Produces pretty-printed output with unescaped Unicode. + +```lua +print(json.encode({name = "test", count = 42})) +-- { +-- "name": "test", +-- "count": 42 +-- } +``` + +### `regex.match(subject, pattern [, flags])` + +Tests whether `subject` matches the PCRE `pattern`. Returns a table of captures on match, or `nil` on no match. Supports all PCRE features (lookaheads, non-greedy quantifiers, Unicode properties, named groups) that Lua's built-in patterns lack. + +```lua +local m = regex.match("hello world 42", "(\\w+) (\\d+)") +-- m = {"world 42", "world", "42"} (full match, then captures) + +local m = regex.match("no digits here", "\\d+") +-- m = nil + +-- Named capture groups +local m = regex.match("price: $19.99", "(?P\\$)(?P[\\d.]+)") +-- m = {"$19.99", "$", "19.99"} +``` + +### `regex.match_all(subject, pattern [, flags])` + +Returns all matches of `pattern` in `subject`. Default flag behavior (`PREG_PATTERN_ORDER`) returns captures grouped by group index. + +```lua +local matches = regex.match_all("foo123bar456baz", "(\\d+)") +-- matches[1] = all full matches, matches[2] = first capture group, etc. +``` + +### `regex.gsub(subject, pattern, replacement [, limit])` + +Replaces all occurrences of `pattern` in `subject` with `replacement`. Returns the resulting string. Supports PCRE backreferences (`$1`, `$2`, etc.). + +```lua +local cleaned = regex.gsub(" hello world ", "\\s+", " ") +-- cleaned = " hello world " + +local s = regex.gsub("aaa", "a", "b", 2) +-- s = "bba" +``` + ## Return Values All `app.*` functions return Lua tables (objects/arrays) on success. On failure, they return `nil, error_message`. Use `pcall` for error handling: @@ -85,4 +161,4 @@ end - **context** — The `ctx` object available in automation scripts - **errors** — Error handling patterns and common error codes -- **examples** — Complete real-world automation examples +- **examples** — Complete real-world automation examples \ No newline at end of file diff --git a/tests/Feature/OpenCompanyAgentTest.php b/tests/Feature/OpenCompanyAgentTest.php index 1bac1c1..dd80fad 100644 --- a/tests/Feature/OpenCompanyAgentTest.php +++ b/tests/Feature/OpenCompanyAgentTest.php @@ -3,22 +3,34 @@ namespace Tests\Feature; use App\Agents\OpenCompanyAgent; +use App\Jobs\IndexDocumentJob; +use App\Models\Channel; use App\Models\Document; +use App\Models\Task; use App\Models\User; use App\Services\AgentDocumentService; +use Illuminate\Support\Facades\Bus; use Illuminate\Foundation\Testing\RefreshDatabase; +use Illuminate\Support\Str; use Tests\TestCase; class OpenCompanyAgentTest extends TestCase { use RefreshDatabase; + protected function setUp(): void + { + parent::setUp(); + + Bus::fake([IndexDocumentJob::class]); + } + public function test_instructions_assembled_from_identity_files(): void { $agent = User::factory()->create([ 'type' => 'agent', 'agent_type' => 'coder', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); // Create identity document structure @@ -37,7 +49,7 @@ public function test_instructions_assembled_from_identity_files(): void $this->assertStringContainsString('TestBot', $instructions); $this->assertStringContainsString('INSTRUCTIONS.md', $instructions); $this->assertStringContainsString('Be helpful and accurate', $instructions); - $this->assertStringContainsString('Available Tools', $instructions); + $this->assertStringContainsString('## Tools', $instructions); } public function test_fallback_instructions_when_no_documents(): void @@ -45,7 +57,7 @@ public function test_fallback_instructions_when_no_documents(): void $agent = User::factory()->create([ 'name' => 'TestAgent', 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -60,20 +72,20 @@ public function test_provider_and_model_resolved_from_brain(): void { $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); - $this->assertEquals('anthropic', $agentInstance->provider()); - $this->assertEquals('claude-sonnet-4-5-20250929', $agentInstance->model()); + $this->assertEquals('codex', $agentInstance->provider()); + $this->assertEquals('gpt-5.3-codex', $agentInstance->model()); } public function test_tools_returned(): void { $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -90,7 +102,7 @@ public function test_fake_prevents_real_api_calls(): void $agent = User::factory()->create([ 'type' => 'agent', - 'brain' => 'anthropic:claude-sonnet-4-5-20250929', + 'brain' => 'codex:gpt-5.3-codex', ]); $agentInstance = OpenCompanyAgent::for($agent, 'channel-1'); @@ -100,4 +112,39 @@ public function test_fake_prevents_real_api_calls(): void OpenCompanyAgent::assertPrompted(fn ($prompt) => $prompt->prompt === 'Test message'); } + + public function test_system_prompts_split_stable_and_runtime_context(): void + { + $agent = User::factory()->create([ + 'type' => 'agent', + 'brain' => 'codex:gpt-5.3-codex', + ]); + $channel = Channel::factory()->create(['type' => 'dm']); + + $task = Task::create([ + 'id' => (string) Str::uuid(), + 'workspace_id' => $this->workspace->id, + 'title' => 'Test task', + 'description' => 'Do the thing', + 'type' => Task::TYPE_CUSTOM, + 'status' => Task::STATUS_ACTIVE, + 'priority' => Task::PRIORITY_NORMAL, + 'source' => Task::SOURCE_CHAT, + 'agent_id' => $agent->id, + 'requester_id' => $agent->id, + 'channel_id' => $channel->id, + 'started_at' => now(), + ]); + + $agentInstance = OpenCompanyAgent::for($agent, $channel->id, $task->id); + $prepared = $agentInstance->preparePrompt('User request here.'); + $systemPrompts = $agentInstance->systemPrompts(); + + $this->assertSame('User request here.', $prepared); + $this->assertCount(2, $systemPrompts); + $this->assertStringNotContainsString('## Current Task', $systemPrompts[0]); + $this->assertStringContainsString('## Current Time', $systemPrompts[1]); + $this->assertStringContainsString('## Current Context', $systemPrompts[1]); + $this->assertStringContainsString('## Current Task', $systemPrompts[1]); + } } diff --git a/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php b/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php index 010a266..b641aa7 100644 --- a/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php +++ b/tests/Feature/Services/Memory/ConversationCompactionServiceTest.php @@ -10,7 +10,6 @@ use App\Models\Message; use App\Models\User; use App\Services\Memory\ConversationCompactionService; -use App\Services\Memory\ModelContextRegistry; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Support\Facades\Bus; use Laravel\Ai\Messages\UserMessage; @@ -34,6 +33,7 @@ protected function setUp(): void parent::setUp(); Bus::fake([IndexDocumentJob::class]); + config(['memory.compaction.memory_extraction.enabled' => false]); $this->agent = User::factory()->agent()->create([ 'name' => 'compact-agent', @@ -50,11 +50,11 @@ protected function setUp(): void ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); $resolver->shouldReceive('resolveFromParts') ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); + $resolver->shouldReceive('setWorkspaceId') + ->andReturnSelf(); - $this->service = new ConversationCompactionService( - app(ModelContextRegistry::class), - $resolver, - ); + $this->app->instance(DynamicProviderResolver::class, $resolver); + $this->service = app(ConversationCompactionService::class); } private function createMessages(int $count, ?string $channelId = null, ?string $authorId = null): void @@ -258,4 +258,26 @@ public function test_compact_increments_compaction_count(): void $this->assertNotNull($second, 'Second compaction should find messages after the compaction point'); $this->assertEquals(2, $second->compaction_count); } + + public function test_needs_compaction_returns_false_when_circuit_is_open(): void + { + ConversationSummary::create([ + 'channel_id' => $this->channel->id, + 'agent_id' => $this->agent->id, + 'summary' => 'Previous summary.', + 'workspace_id' => $this->workspace->id, + 'compaction_circuit_open_until' => now()->addMinutes(10), + ]); + + $messages = [new UserMessage(str_repeat('Word ', 2000))]; + + $result = $this->service->needsCompaction( + $this->channel->id, + $this->agent, + $messages, + 'Short system prompt.', + ); + + $this->assertFalse($result); + } } diff --git a/tests/Feature/Services/Memory/MemoryFlushServiceTest.php b/tests/Feature/Services/Memory/MemoryFlushServiceTest.php index 0ff3ee0..9babe1b 100644 --- a/tests/Feature/Services/Memory/MemoryFlushServiceTest.php +++ b/tests/Feature/Services/Memory/MemoryFlushServiceTest.php @@ -8,8 +8,8 @@ use App\Models\ConversationSummary; use App\Models\User; use App\Services\Memory\ConversationCompactionService; +use App\Services\Memory\ContextBudget; use App\Services\Memory\MemoryFlushService; -use App\Services\Memory\ModelContextRegistry; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Support\Facades\Bus; use Laravel\Ai\Messages\UserMessage; @@ -35,6 +35,7 @@ protected function setUp(): void parent::setUp(); Bus::fake([IndexDocumentJob::class]); + config(['memory.compaction.memory_extraction.enabled' => false]); $this->agent = User::factory()->agent()->create([ 'name' => 'flush-agent', @@ -50,17 +51,12 @@ protected function setUp(): void ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); $resolver->shouldReceive('resolveFromParts') ->andReturn(['provider' => 'openai', 'model' => 'gpt-4o']); + $resolver->shouldReceive('setWorkspaceId') + ->andReturnSelf(); - $this->compactionService = new ConversationCompactionService( - app(ModelContextRegistry::class), - $resolver, - ); - - $this->service = new MemoryFlushService( - $this->compactionService, - app(ModelContextRegistry::class), - $resolver, - ); + $this->app->instance(DynamicProviderResolver::class, $resolver); + $this->compactionService = app(ConversationCompactionService::class); + $this->service = app(MemoryFlushService::class); } /** diff --git a/tests/Feature/Services/Memory/ModelContextRegistryTest.php b/tests/Feature/Services/Memory/ModelContextRegistryTest.php index f610566..de37342 100644 --- a/tests/Feature/Services/Memory/ModelContextRegistryTest.php +++ b/tests/Feature/Services/Memory/ModelContextRegistryTest.php @@ -27,6 +27,13 @@ public function test_exact_match_returns_context_window(): void $this->assertEquals(128_000, $result); } + public function test_provider_aware_lookup_uses_prism_relay_metadata(): void + { + $result = $this->registry->getContextWindow('claude-sonnet-4-5-20250929', 'anthropic'); + + $this->assertEquals(200_000, $result); + } + public function test_prefix_match_returns_longest(): void { // 'gpt-4o-mini-2024-07-18' starts with 'gpt-4o-mini' (128K) and also 'gpt-4o' (128K) @@ -60,6 +67,15 @@ public function test_user_override_takes_precedence(): void $this->assertEquals(64_000, $result); } + public function test_provider_specific_override_takes_precedence(): void + { + AppSetting::setValue('model_context_windows', ['anthropic:claude-sonnet-4-5-20250929' => 64_000], 'memory'); + + $result = $this->registry->getContextWindow('claude-sonnet-4-5-20250929', 'anthropic'); + + $this->assertEquals(64_000, $result); + } + public function test_levenshtein_rejects_distance_above_five(): void { // A model name that differs by more than 5 from all known models diff --git a/tests/Unit/ContextPrunerTest.php b/tests/Unit/ContextPrunerTest.php new file mode 100644 index 0000000..365d5a0 --- /dev/null +++ b/tests/Unit/ContextPrunerTest.php @@ -0,0 +1,58 @@ + 1, + 'memory.pruning.min_result_tokens' => 10, + 'memory.pruning.min_total_saved_tokens' => 10, + ]); + + $pruner = app(ContextPruner::class); + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult('call-1', 'read_file', ['path' => '/tmp/a.md'], str_repeat('old result ', 100)), + ])), + new ToolResultMessage(new Collection([ + new ToolResult('call-2', 'read_file', ['path' => '/tmp/b.md'], str_repeat('recent result ', 100)), + ])), + ]; + + $result = $pruner->prune($messages); + + $this->assertSame(1, $result['pruned_results']); + $this->assertStringContainsString('omitted from retry context', $result['messages'][0]->toolResults[0]->result); + $this->assertStringContainsString('recent result', $result['messages'][1]->toolResults[0]->result); + } + + public function test_does_not_prune_write_results(): void + { + config([ + 'memory.pruning.keep_recent_read_results' => 0, + 'memory.pruning.min_result_tokens' => 10, + 'memory.pruning.min_total_saved_tokens' => 10, + ]); + + $pruner = app(ContextPruner::class); + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult('call-1', 'send_channel_message', ['channelId' => 'chan-1'], str_repeat('sent ', 100)), + ])), + ]; + + $result = $pruner->prune($messages); + + $this->assertSame(0, $result['pruned_results']); + $this->assertStringNotContainsString('omitted from retry context', $result['messages'][0]->toolResults[0]->result); + } +} diff --git a/tests/Unit/OutputTruncatorTest.php b/tests/Unit/OutputTruncatorTest.php new file mode 100644 index 0000000..d878640 --- /dev/null +++ b/tests/Unit/OutputTruncatorTest.php @@ -0,0 +1,30 @@ +truncate($output, 'call-123'); + + $this->assertIsString($result); + $this->assertStringContainsString('[truncated - full output stored at storage:', $result); + Storage::disk('local')->assertExists('tool-results-test/'.now()->format('Y/m/d').'/tool_call-123.txt'); + } +} diff --git a/tests/Unit/PrismMessagesTest.php b/tests/Unit/PrismMessagesTest.php new file mode 100644 index 0000000..8ab5b73 --- /dev/null +++ b/tests/Unit/PrismMessagesTest.php @@ -0,0 +1,46 @@ + '/tmp/test.md'], + ), + ]), + ), + new ToolResultMessage(collect([ + new ToolResult( + id: 'call-1', + name: 'read_file', + arguments: ['path' => '/tmp/test.md'], + result: 'file contents', + ), + ])), + ])); + + $this->assertCount(2, $messages); + $this->assertInstanceOf(PrismAssistantMessage::class, $messages[0]); + $this->assertSame('read_file', $messages[0]->toolCalls[0]->name); + $this->assertInstanceOf(PrismToolResultMessage::class, $messages[1]); + $this->assertSame('file contents', $messages[1]->toolResults[0]->result); + } +} diff --git a/tests/Unit/PromptFrameBuilderTest.php b/tests/Unit/PromptFrameBuilderTest.php new file mode 100644 index 0000000..011f23a --- /dev/null +++ b/tests/Unit/PromptFrameBuilderTest.php @@ -0,0 +1,26 @@ +splitSections([ + ['label' => 'Header', 'content' => "Header\n"], + ['label' => 'Current Time', 'content' => "Time\n"], + ['label' => 'Apps', 'content' => "Apps\n"], + ['label' => 'Current Task', 'content' => "Task\n"], + ]); + + $this->assertSame("Header\nApps\n", $frame['stable_prompt']); + $this->assertSame("Time\nTask\n", $frame['volatile_prompt']); + $this->assertCount(2, $frame['stable_breakdown']); + $this->assertCount(2, $frame['volatile_breakdown']); + } +} diff --git a/tests/Unit/ToolResultDeduplicatorTest.php b/tests/Unit/ToolResultDeduplicatorTest.php new file mode 100644 index 0000000..c063843 --- /dev/null +++ b/tests/Unit/ToolResultDeduplicatorTest.php @@ -0,0 +1,75 @@ + '/tmp/example.txt'], + result: 'hello world', + ), + ])), + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-2', + name: 'read_file', + arguments: ['path' => '/tmp/example.txt'], + result: 'hello world', + ), + ])), + ]; + + $result = $deduplicator->deduplicate($messages); + + $this->assertSame(1, $result['deduplicated']); + $this->assertStringContainsString( + '[Superseded', + $result['messages'][0]->toolResults[0]->result + ); + $this->assertSame('hello world', $result['messages'][1]->toolResults[0]->result); + } + + public function test_does_not_supersede_write_tool_results(): void + { + $deduplicator = app(ToolResultDeduplicator::class); + + $messages = [ + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-1', + name: 'send_channel_message', + arguments: ['channelId' => 'chan-1', 'message' => 'hello world'], + result: 'Message sent successfully.', + ), + ])), + new ToolResultMessage(new Collection([ + new ToolResult( + id: 'call-2', + name: 'send_channel_message', + arguments: ['channelId' => 'chan-1', 'message' => 'hello world'], + result: 'Message sent successfully.', + ), + ])), + ]; + + $result = $deduplicator->deduplicate($messages); + + $this->assertSame(0, $result['deduplicated']); + $this->assertSame('Message sent successfully.', $result['messages'][0]->toolResults[0]->result); + $this->assertSame('Message sent successfully.', $result['messages'][1]->toolResults[0]->result); + } +}